defaulting variables to avoir Perl warnings
[koha.git] / z3950 / processz3950queue
index 2d0feee..8ab2ada 100755 (executable)
@@ -10,6 +10,83 @@ use C4::Output;
 use C4::Breeding;
 use Net::Z3950;
 
+=head1 NAME
+
+processz3950queue. The script that does z3950 searches.
+
+=head1 SYNOPSIS
+
+This script can be used on a console (as normal user) or by the daemon-launch script.
+
+Don't forget to EXPORT PERL5LIB=/PATH/to/KOHA before executing it if you use console mode.
+
+It :
+1- extracts z3950 requests from z3950queue table,
+2- creates entries in z3950results to store the result
+3- launch z3950 queries in asynchronous mode, using Unix fork()
+4- store results in marc_breeding table.
+
+The z3950 results queries are managed in z3950/search.pl script (poped-up window in MARC editor).
+
+=head1 DESCRIPTION
+
+=head2 table z3950servers
+
+This table stores the differents z3950 servers.
+A server is used if checked=1. The rank is NOT used as searches are now asynchronous.
+
+=head2 table z3950queue
+
+Table use to manage queries. A single line is created  in this table for each z3950 search request.
+If more than 1 server is called, the C<servers> field containt all of them separated by |.
+
+z3950 search requests are done by z3950/search.pl script.
+At this stage, the fields are created with C<startdate> and C<done> empty
+
+Then, the processz3950queue finds this entry and :
+1- store date (time()) in C<startdate>
+2- set C<done> = -1
+
+when the requests are all sent :
+2- set C<done> = 1
+3- set C<enddate> (FIXME: always equal to startdate for me)
+
+entries are deleted when :
+- C<startdate> is more than 1 day ago.
+
+FIXME:
+- results, numrecords fields are unused
+
+=head2 table z3950results
+
+1 entry is created for each request, for each server called.
+when created :
+* C<startdate> is filled
+* C<enddate> is null
+* active is set to 0. when active is 0, it means the request has not been sent. when set to 1, it means it's on the way.
+
+When a search is ended, C<enddate> is set, and C<active> is set to -1
+
+=head1 How it's written
+
+on every loop :
+* delete old queries
+* for each entry in z3950queue table that is not done=1 {
+       for each search request {
+               for each server {
+                       try to connect
+                       look for results
+                       *      results can be :
+                       - existing and already running on another process (active=1)
+                       - existing & finished (active=-1)
+                       - non existent => create it and run the request.
+               }
+       }
+}
+=over 2
+
+=cut
+
 
 if ($< == 0) {
     # Running as root, switch privs
@@ -19,7 +96,7 @@ if ($< == 0) {
        close PID;
     }
     # Get real apacheuser from koha.conf or reparsing httpd.conf
-    my $apacheuser='paul';
+    my $apacheuser=C4::Context->config("httpduser");
     my $uid=0;
     unless ($uid = (getpwnam($apacheuser))[2]) {
        die "Attempt to run daemon as non-existent or superuser\n";
@@ -27,9 +104,15 @@ if ($< == 0) {
     $>=$uid;
     $<=$uid;
 }
-my $dbh = C4::Context->dbh;
+my $db_driver = $context->{"config"}{"db_scheme"} || "mysql";
+my $db_name   = $context->{"config"}{"database"};
+my $db_host   = $context->{"config"}{"hostname"};
+my $db_user   = $context->{"config"}{"user"};
+my $db_passwd = $context->{"config"}{"pass"};
+my $dbh = DBI->connect("DBI:$db_driver:$db_name:$db_host",$db_user, $db_passwd);
 
-my $sth=$dbh->prepare("update z3950results set active=0");
+# we begin the script, so "unactive" every pending request : they will never give anything, the script died :-(
+my $sth=$dbh->prepare("update z3950results set active=0 where active<>-1");
 $sth->execute;
 $sth->finish;
 $SIG{CHLD}='reap';
@@ -51,123 +134,145 @@ while (1) {
        if ((time-$lastrun)>5) {
                print "starting loop\n";
                $checkqueue = 1; # FIXME during testing, this line forces the loop. REMOVE it to use SIG{HUP} when "daemonized" !
+# clean DB
+               my $now = time();
+               # delete z3950queue entries that are more than 1 day old
+               my $sth = $dbh->prepare("delete from z3950queue where ?-startdate > 86400");
+               $sth->execute($now);
+               # delete z3950results queries that are more than 1 hour old
+               $sth = $dbh->prepare("delete from z3950results where ?-startdate > 3600");
+               $sth->execute($now);
                if ($checkqueue) { # everytime a SIG{HUP} is recieved
                        $checkqueue=0;
-                       my $sth=$dbh->prepare("select id,term,type,servers,identifier from z3950queue order by id");
+# parse every entry in QUEUE
+                       $sth=$dbh->prepare("select id,term,type,servers,identifier from z3950queue where done<>1 or done is null order by id");
                        $sth->execute;
                        while (my ($id, $term, $type, $servers,$random) = $sth->fetchrow) {
+# FIXME: there is no "else". So, if more than 12 requests at the same time => requests are lost !
                                if ($forkcounter<12) {
                                        my $now=time();
-                                       my $stk=$dbh->prepare("select id,server,startdate,enddate,numrecords,active from z3950results where queryid=$id");
-                                       ($stk->execute) || (next);
+# search for results entries for this request
+                                       my $stk=$dbh->prepare("select id,server,startdate,enddate,numrecords,active from z3950results where queryid=?");
+                                       ($stk->execute($id)) || (next);
                                        my %serverdone;
+# if no results => set queue to done = -1, set startdate and begin creating z3950results table entries & z3950 queries
                                        unless ($stk->rows) {
-                                               my $sti=$dbh->prepare("update z3950queue set done=-1,startdate=$now where id=$id");
-                                               $sti->execute;
+                                               my $sti=$dbh->prepare("update z3950queue set done=-1,startdate=? where id=?");
+                                               $sti->execute($now,$id);
                                        }
+# check which servers calls have already been created (before a crash)
                                        while (my ($r_id, $r_server,$r_startdate,$r_enddate,$r_numrecords,$active) = $stk->fetchrow) {
-                                               if ($r_enddate >0) {
+                                               if ($r_enddate >0) { # result entry exist & finished
                                                        $serverdone{$r_server}=1;
-                                               } elsif ($active) {
+                                               } elsif ($active) { # result entry exists & on the way (active=1) or already done (active=-1)
                                                        $serverdone{$r_server}=1;
-                                               } else {
+                                               } else { # otherwise
                                                        $serverdone{$r_server}=-1;
                                                }
+                                               # note that is the entry doesn't exist, the $serverdone{$r_server} is 0 (important later !)
                                        }
-
                                        $stk->finish;
-                                       my $attr='';
-                                       if ($type eq 'isbn') {
-                                               $attr='1=7';
-                                       } elsif ($type eq 'title') {
-                                               $attr='1=4';
-                                       } elsif ($type eq 'author') {
-                                               $attr='1=1003';
-                                       } elsif ($type eq 'lccn') {
-                                               $attr='1=9';
-                                       } elsif ($type eq 'keyword') {
-                                               $attr='1=1016';
-                                       }
-                                       $term='"'.$term.'"';
-                                       my $query="\@attr $attr $term";
-                                       my $totalrecords=0;
-                                       my $serverinfo;
-                                       my $stillprocessing=0;
-                                       my $globalname;
-                                       my $globalsyntax;
-                                       my $globalencoding;
-                                       foreach $serverinfo (split(/\s+/, $servers)) {
-                                               (next) if ($serverdone{$serverinfo} == 1);
-                                               my $stillprocessing=1;
+                                       foreach my $serverinfo (split(/\|/, $servers)) {
+                                               (next) if ($serverdone{$serverinfo} == 1); #(otherwise, is 0 or -1)
+                                               my $totalrecords=0;
+                                               my $globalname;
+                                               my $globalsyntax;
+                                               my $globalencoding;
+# fork a process for this z3950 query
                                                if (my $pid=fork()) {
                                                        $forkcounter++;
                                                } else {
-                                                       my $dbi = C4::Context->dbh;
+# and connect to z3950 server
+# new connection required ($dbi) because we might have forked (thanks Harry Jackson)
+                                                       my $db_driver = $context->{"config"}{"db_scheme"} || "mysql";
+                                                       my $db_name   = $context->{"config"}{"database"};
+                                                       my $db_host   = $context->{"config"}{"hostname"};
+                                                       my $db_user   = $context->{"config"}{"user"};
+                                                       my $db_passwd = $context->{"config"}{"pass"};
+                                                       my $dbi = DBI->connect("DBI:$db_driver:$db_name:$db_host",$db_user, $db_passwd);
                                                        my ($name, $server, $database, $user, $password,$syntax) = split(/\//, $serverinfo, 6);
                                                        $globalname=$name;
                                                        $globalsyntax = $syntax;
                                                        $server=~/(.*)\:(\d+)/;
                                                        my $servername=$1;
                                                        my $port=$2;
-                                                       print "Processing $type=$term at $name $server $database $syntax (".($forkcounter+1)." forks)\n";
-                                                       $now=time();
-                                                       my $q_serverinfo=$dbi->quote($serverinfo);
-                                                       my $resultsid;
-                                                       if ($serverdone{$serverinfo}==-1) {
-                                                               my $stj=$dbi->prepare("select id from z3950results where server=$q_serverinfo and queryid=$id");
-                                                               $stj->execute;
-                                                               ($resultsid) = $stj->fetchrow;
-                                                               $stj->finish;
-                                                       } else {
-                                                               my $stj=$dbi->prepare("select id from z3950results where server=$q_serverinfo and queryid=$id");
-                                                               $stj->execute;
-                                                               ($resultsid) = $stj->fetchrow;
-                                                               $stj->finish;
-                                                               unless ($resultsid) {
-                                                                       $stj=$dbi->prepare("insert into z3950results (server, queryid, startdate) values ($q_serverinfo, $id, $now)");
-                                                                       $stj->execute;
-                                                                       $resultsid=$dbi->{'mysql_insertid'};
-                                                                       $stj->finish;
-                                                               }
+                                                       my $attr='';
+                                                       if ($type eq 'isbn') {
+                                                               $attr='1=7';
+                                                       } elsif ($type eq 'title') {
+                                                               $attr='1=4';
+                                                       } elsif ($type eq 'author') {
+                                                               $attr='1=1003';
+                                                       } elsif ($type eq 'lccn') {
+                                                               $attr='1=9';
+                                                       } elsif ($type eq 'keyword') {
+                                                               $attr='1=1016';
                                                        }
-                                                       my $stj=$dbh->prepare("update z3950results set active=1 where id=$resultsid");
-                                                       $stj->execute;
+                                                       my $query="\@attr $attr \"$term\"";
+                                                       print "$$/$id : Processing $type=$term at $name $server $database $syntax (".($forkcounter+1)." forks)\n";
+# try to connect
                                                        my $conn;
                                                        my $noconnection=0;
                                                        my $error=0;
+# the z3950 query is builded. Launch it.
                                                        if ($user) {
-                                                               eval { $conn= new Net::Z3950::Connection($servername, $port, databaseName => $database, user => $user, password => $password); };
-                                                               if ($@) {
-                                                                       $noconnection=1;
-                                                               } else {
-                                                                       $error=pe();
-                                                               }
+                                                               $conn= new Net::Z3950::Connection($servername, $port, databaseName => $database, user => $user, password => $password) || ($noconnection=1);
                                                        } else {
-                                                               eval { $conn= new Net::Z3950::Connection($servername, $port, databaseName => $database); };
-                                                               if ($@) {
-                                                                       $noconnection=1;
-                                                               } else {
-                                                                       $error=pe();
-                                                               }
+                                                               $conn= new Net::Z3950::Connection($servername, $port, databaseName => $database) || ($noconnection=1);
                                                        }
                                                        if ($noconnection || $error) {
-                                                               warn "no connection at $globalname ";
+# if connection impossible, don't go further !
+                                                               print "$$/$id : no connection at $globalname\n";
+                                                               my $result = MARC::Record->new();
+                                                               my ($notmarcrecord,$alreadyindb,$alreadyinfarm,$imported) = ImportBreeding($result,-1,"$globalname server is NOT responding","",$random);
                                                        } else {
-                                                               warn "$globalname ==> $globalsyntax";
+# else, build z3950 query and do it !
+                                                               $now=time();
+                                                               my $resultsid="";
+       # create z3950results entries.
+                                                               if ($serverdone{$serverinfo}==-1) { # if entry exist, just retrieve it
+                                                                       my $stj=$dbi->prepare("select id from z3950results where server=? and queryid=?");
+                                                                       $stj->execute($serverinfo,$id);
+                                                                       ($resultsid) = $stj->fetchrow;
+                                                                       $stj->finish;
+                                                                       print "$$/$id : 1 >> $resultsid\n";
+                                                               } else { # else create it : (may be serverdone=1 or 0)
+                                                                       my $stj=$dbi->prepare("select id from z3950results where server=? and queryid=?");
+                                                                       $stj->execute($serverinfo,$id);
+                                                                       ($resultsid) = $stj->fetchrow;
+                                                                       $stj->finish;
+                                                                       print "$$/$id : 2 >> $resultsid\n";
+                                                                       unless ($resultsid) {
+                                                                               $stj=$dbi->prepare("insert into z3950results (server, queryid, startdate) values (?,?,?)");
+                                                                               $stj->execute($serverinfo, $id, $now);
+                                                                               $resultsid=$dbi->{'mysql_insertid'};
+                                                                               $stj->finish;
+                                                                               print "$$/$id : creating and ";
+                                                                       }
+                                                               }
+                                                               print "$$/$id : working on results entry $resultsid\n";
+       # set active to 1 => this request is on the way.
+                                                               my $stj=$dbi->prepare("update z3950results set active=1 where id=?");
+                                                               $stj->execute($resultsid);
+#######
+                                                               print "$$/$id : connected to $globalname\n";
                                                                eval {$conn->option(elementSetName => 'F')};
                                                                eval { $conn->option(preferredRecordSyntax => Net::Z3950::RecordSyntax::USMARC);} if ($globalsyntax eq "USMARC");
                                                                eval { $conn->option(preferredRecordSyntax => Net::Z3950::RecordSyntax::UNIMARC);} if ($globalsyntax eq "UNIMARC");
                                                                if ($@) {
-                                                                       print "$globalname ERROR: $@\n";
+                                                                       print "$$/$id : $globalname ERROR: $@ for $resultsid\n";
+       # in case pb during connexion, set result to "empty" to avoid everlasting loops
+                                                                       my $stj=$dbi->prepare("update z3950results set numrecords=?,numdownloaded=?,highestseen=0,results='',enddate=? where id=?");
+                                                                       $stj->execute(0,0,$now,$resultsid);
                                                                } else {
-#                                                                      print "Q: $query\n";
                                                                        my $rs=$conn->search($query);
                                                                        pe();
+       # we have an answer for a query => get results & store them in marc_breeding table
                                                                        my $numresults=$rs->size();
                                                                        if ($numresults eq 0) {
-                                                                               warn "$globalname ==> answered : no records found";
+                                                                               print "$$/$id : $globalname : no records found\n";
                                                                        } else {
-                                                                               warn "$globalname ==> answered : $numresults found";
+                                                                               print "$$/$id : $globalname : $numresults records found, retrieving them (max 80)\n";
                                                                        }
                                                                        pe();
                                                                        my $i;
@@ -197,82 +302,35 @@ while (1) {
                                                                        my $elapsed=$scantimerend-$scantimerstart;
                                                                        if ($elapsed) {
                                                                                my $speed=int($numresults/$elapsed*100)/100;
-                                                                               print "$globalname   SPEED: $speed  $server done $numrecords\n";
+                                                                               print "$$/$id : $globalname : $server records retrieved $numrecords SPEED: $speed\n";
                                                                        }
                                                                        my $q_result=$dbi->quote($result);
                                                                        ($q_result) || ($q_result='""');
                                                                        $now=time();
                                                                        if ($numresults >0) {
-                                                                               my $task="update z3950results set numrecords=$numresults,numdownloaded=$numrecords,highestseen=0,results=$q_result,enddate=$now where id=$resultsid";
-                                                                               my $stj=$dbi->prepare($task);
-                                                                               $stj->execute;
+                                                                               my $stj=$dbi->prepare("update z3950results set numrecords=?,numdownloaded=?,highestseen=0,results=?,enddate=? where id=?");
+                                                                               $stj->execute($numresults,$numrecords,$q_result,$now,$resultsid);
                                                                        } else { # no results...
-                                                                               my $task="update z3950results set numrecords=$numresults,numdownloaded=$numrecords,highestseen=0,results='',enddate=$now where id=$resultsid";
-                                                                               my $stj=$dbi->prepare($task);
-                                                                               $stj->execute;
-                                                                       }
-                                                                       my $counter=0;
-                                                                       while ($counter<60 && $numrecords<$numresults) {
-                                                                               $counter++;
-                                                                               my $stj=$dbi->prepare("select highestseen from z3950results where id=$resultsid");
-                                                                               $stj->execute;
-                                                                               my ($highestseen) = $stj->fetchrow;
-                                                                               if ($highestseen>($numrecords-30)) {
-                                                                                       $counter=0;
-                                                                                       print "   $server rescanning\n";
-                                                                                       my $scantimerstart=time();
-                                                                                       for ($i=$numrecords+1; $i<=(($numresults<($numrecords+40)) ? ($numresults) : ($numrecords+40)); $i++) {
-                                                                                               my $rec=$rs->record($i);
-                                                                                               my $marcdata=$rec->rawdata();
-                                                                                               $result.=$marcdata;
-                                                                                       }
-                                                                                       my $scantimerend=time();
-                                                                                       ($numresults<$numrecords+40) ? ($numrecords=$numresults) : ($numrecords += 40);
-                                                                                       my $elapsed=$scantimerend-$scantimerstart;
-                                                                                       if ($elapsed) {
-                                                                                               my $speed=int($numresults/$elapsed*100)/100;
-                                                                                               print "  SPEED: $speed  $server done $numrecords\n";
-                                                                                       }
-
-                                                                                       my $q_result=$dbi->quote($result);
-                                                                                       ($q_result) || ($q_result='""');
-                                                                                       $now=time();
-                                                                                       my $task="update z3950results set numdownloaded=$numrecords,results=$q_result where id=$resultsid";
-                                                                                       my $stj=$dbi->prepare($task);
-                                                                                       $stj->execute;
-                                                                               }
-                                                                               sleep 5;
+                                                                               my $stj=$dbi->prepare("update z3950results set numrecords=?,numdownloaded=?,highestseen=0,results='',enddate=? where id=?");
+                                                                               $stj->execute($numresults,$numrecords,$now,$resultsid);
                                                                        }
                                                                }
+                                                               $stj=$dbi->prepare("update z3950results set active=-1 where id=?");
+                                                               $stj->execute($resultsid);
+                                                               eval {my $stj->finish};
                                                        }
-                                                       # FIXME - There's already a $stj in this scope
-                                                       my $stj=$dbi->prepare("update z3950results set active=0 where id=$resultsid");
-                                                       $stj->execute;
-                                                       eval {$stj->finish};
-                                                       print "    $server done.\n";
+#OK, the search is done inactivate it..
+                                                       print "$$/$id : $server search done.\n";
                                                        exit;
-sub pe {
-       return 0;
-       my $code=$conn->errcode();
-       my $msg=$conn->errmsg();
-       my $ai=$conn->addinfo();
-       print << "EOF";
-CODE:  $code
-MSG:   $msg
-ADDTL: $ai
-EOF
-       return 0;
-}
                                                }
                                        }
-                                       unless ($stillprocessing) {
-                                               #my $sti=$dbh->prepare("select enddate from z3950queue where id=$id");
-                                               #$sti->execute;
-                                               #my ($enddate) = $sti->fetchrow;
-                                               #unless ($enddate) {
-                                       }
                                } else {
+# $forkcounter >=12
                                }
+                               # delete z3950queue entry, as everything is done
+                               my $sti=$dbh->prepare("update z3950queue set done=1,enddate=? where id=?");
+                               $now=time();
+                               $sti->execute($now,$id);
                        }
                        $lastrun=time();
                }
@@ -291,3 +349,6 @@ sub checkqueue {
 }
 
 
+sub pe {
+       return 0;
+}