* A failed full dump is now saved as a partial (incomplete) dump,
[BackupPC.git] / bin / BackupPC
index e13b60d..027bbf2 100755 (executable)
@@ -29,7 +29,7 @@
 #   Craig Barratt  <cbarratt@users.sourceforge.net>
 #
 # COPYRIGHT
-#   Copyright (C) 2001  Craig Barratt
+#   Copyright (C) 2001-2003  Craig Barratt
 #
 #   This program is free software; you can redistribute it and/or modify
 #   it under the terms of the GNU General Public License as published by
 #
 #========================================================================
 #
-# Version 2.0.0_CVS, released 18 Jan 2003.
+# Version 2.1.0_CVS, released 3 Jul 2003.
 #
 # See http://backuppc.sourceforge.net.
 #
 #========================================================================
 
 use strict;
+no  utf8;
 use vars qw(%Status %Info $Hosts);
 use lib "/usr/local/BackupPC/lib";
 use BackupPC::Lib;
@@ -70,8 +71,7 @@ use Digest::MD5;
 # Handle command line options
 ###########################################################################
 my %opts;
-getopts("d", \%opts);
-if ( @ARGV != 0 ) {
+if ( !getopts("d", \%opts) || @ARGV != 0 ) {
     print("usage: $0 [-d]\n");
     exit(1);
 }
@@ -249,8 +249,8 @@ sub Main_Initialize
         exit(1);
     }
     foreach my $progName ( qw(SmbClientPath NmbLookupPath PingPath DfPath
-                              SendmailPath) ) {
-        next if ( !defined($Conf{$progName}) || -x $Conf{$progName} );
+                              SendmailPath SshPath) ) {
+        next if ( $Conf{$progName} eq "" || -x $Conf{$progName} );
         print(STDERR $bpc->timeStamp,
                      "\$Conf{$progName} = '$Conf{$progName}' is not a"
                    . " valid executable program\n");
@@ -320,6 +320,10 @@ sub Main_Initialize
         }
         $Status{$host}{activeJob} = 0;
     }
+    foreach my $host ( sort(keys(%Status)) ) {
+        next if ( defined($Hosts->{$host}) );
+       delete($Status{$host});
+    }
 
     #
     # Write out our initial status and save our PID
@@ -561,9 +565,12 @@ sub Main_TryToRun_Bg_or_User_Queue
         vec($FDread, $Jobs{$host}{fn}, 1) = 1;
         $Jobs{$host}{startTime}  = time;
         $Jobs{$host}{reqTime}    = $req->{reqTime};
+        $Jobs{$host}{userReq}    = $req->{userReq};
         $Jobs{$host}{cmd}        = join(" ", $progName, @args);
         $Jobs{$host}{user}       = $user;
         $Jobs{$host}{type}       = $type;
+       $Status{$host}{userReq}  = $req->{userReq}
+                                       if ( defined($Hosts->{$host}) );
         if ( !$req->{dhcp} ) {
             $Status{$host}{state}     = "Status_".$type."_starting";
             $Status{$host}{activeJob} = 1;
@@ -655,9 +662,16 @@ sub Main_Process_Signal
         $NextWakeup = 0;
     } elsif ( $SigName ) {
         print(LOG $bpc->timeStamp, "Got signal $SigName... cleaning up\n");
-        foreach my $host ( keys(%Jobs) ) {
-            kill(2, $Jobs{$host}{pid});
-        }
+       if ( keys(%Jobs) ) {
+           foreach my $host ( keys(%Jobs) ) {
+               kill(2, $Jobs{$host}{pid});
+           }
+           sleep(1);
+           foreach my $host ( keys(%Jobs) ) {
+               kill(9, $Jobs{$host}{pid});
+           }
+           %Jobs = ();
+       }
         StatusWrite();
         unlink("$TopDir/log/BackupPC.pid");
         exit(1);
@@ -797,16 +811,11 @@ sub Main_Check_Job_Messages
                 } else {
                     print(LOG $bpc->timeStamp, "dhcp $host: $mesg\n");
                 }
-            } elsif ( $mesg =~ /^started (.*) dump, pid=(-?\d+), tarPid=(-?\d+), share=(.*)/ ) {
+            } elsif ( $mesg =~ /^started (.*) dump, share=(.*)/ ) {
                 $Jobs{$host}{type}      = $1;
-                $Jobs{$host}{xferPid}   = $2;
-                $Jobs{$host}{tarPid}    = $3;
-                $Jobs{$host}{shareName} = $4;
+                $Jobs{$host}{shareName} = $2;
                 print(LOG $bpc->timeStamp,
-                          "Started $1 backup on $host"
-                          . " (pid=$Jobs{$host}{pid}, xferPid=$2",
-                          $Jobs{$host}{tarPid} > 0
-                                ? ", tarPid=$Jobs{$host}{tarPid}" : "",
+                          "Started $1 backup on $host (pid=$Jobs{$host}{pid}",
                           $Jobs{$host}{dhcpHostIP}
                                 ? ", dhcp=$Jobs{$host}{dhcpHostIP}" : "",
                           ", share=$Jobs{$host}{shareName})\n");
@@ -818,16 +827,13 @@ sub Main_Check_Job_Messages
                 $Status{$host}{aliveCnt}++;
                 $Status{$host}{dhcpCheckCnt}--
                                 if ( $Status{$host}{dhcpCheckCnt} > 0 );
-            } elsif ( $mesg =~ /^started_restore (\S+) (\S+)/ ) {
-                $Jobs{$host}{type}    = "restore";
+            } elsif ( $mesg =~ /^xferPids (.*)/ ) {
                 $Jobs{$host}{xferPid} = $1;
-                $Jobs{$host}{tarPid}  = $2;
+            } elsif ( $mesg =~ /^started_restore/ ) {
+                $Jobs{$host}{type}    = "restore";
                 print(LOG $bpc->timeStamp,
                           "Started restore on $host"
-                          . " (pid=$Jobs{$host}{pid}, xferPid=$2",
-                          $Jobs{$host}{tarPid} > 0
-                                ? ", tarPid=$Jobs{$host}{tarPid}" : "",
-                          ")\n");
+                          . " (pid=$Jobs{$host}{pid})\n");
                 $Status{$host}{state}     = "Status_restore_in_progress";
                 $Status{$host}{reason}    = "";
                 $Status{$host}{type}      = "restore";
@@ -847,16 +853,23 @@ sub Main_Check_Job_Messages
                 delete($Status{$host}{errorTime});
                 $Status{$host}{endTime}   = time;
             } elsif ( $mesg =~ /^nothing to do/ ) {
-                $Status{$host}{state}     = "Status_idle";
-                $Status{$host}{reason}    = "Reason_nothing_to_do";
-                $Status{$host}{startTime} = time;
+               if ( $Status{$host}{reason} ne "Reason_backup_failed"
+                       && $Status{$host}{reason} ne "Reason_restore_failed" ) {
+                   $Status{$host}{state}     = "Status_idle";
+                   $Status{$host}{reason}    = "Reason_nothing_to_do";
+                   $Status{$host}{startTime} = time;
+               }
                 $Status{$host}{dhcpCheckCnt}--
                                 if ( $Status{$host}{dhcpCheckCnt} > 0 );
             } elsif ( $mesg =~ /^no ping response/
-                            || $mesg =~ /^ping too slow/ ) {
+                            || $mesg =~ /^ping too slow/
+                            || $mesg =~ /^host not found/ ) {
                 $Status{$host}{state}     = "Status_idle";
-                if ( $Status{$host}{reason} ne "Reason_backup_failed" ) {
+                if ( $Status{$host}{userReq}
+                       || $Status{$host}{reason} ne "Reason_backup_failed"
+                       || $Status{$host}{error} =~ /^aborted by user/ ) {
                     $Status{$host}{reason}    = "Reason_no_ping";
+                   $Status{$host}{error}     = $mesg;
                     $Status{$host}{startTime} = time;
                 }
                 $Status{$host}{deadCnt}++;
@@ -869,7 +882,14 @@ sub Main_Check_Job_Messages
                 $Status{$host}{error}     = $1;
                 $Status{$host}{errorTime} = time;
                 $Status{$host}{endTime}   = time;
-                print(LOG $bpc->timeStamp, "backup failed on $host ($1)\n");
+                print(LOG $bpc->timeStamp, "Backup failed on $host ($1)\n");
+            } elsif ( $mesg =~ /^restore failed: (.*)/ ) {
+                $Status{$host}{state}     = "Status_idle";
+                $Status{$host}{reason}    = "Reason_restore_failed";
+                $Status{$host}{error}     = $1;
+                $Status{$host}{errorTime} = time;
+                $Status{$host}{endTime}   = time;
+                print(LOG $bpc->timeStamp, "Restore failed on $host ($1)\n");
             } elsif ( $mesg =~ /^log\s+(.*)/ ) {
                 print(LOG $bpc->timeStamp, "$1\n");
             } elsif ( $mesg =~ /^BackupPC_stats = (.*)/ ) {
@@ -1019,17 +1039,29 @@ sub Main_Check_Client_Messages
                 if ( $CmdJob ne $host && defined($Status{$host})
                                       && defined($Jobs{$host}) ) {
                     print(LOG $bpc->timeStamp,
-                               "Stopping current backup of $host,"
+                               "Stopping current $Jobs{$host}{type} of $host,"
                              . " request by $user (backoff=$backoff)\n");
                     kill(2, $Jobs{$host}{pid});
-                    vec($FDread, $Jobs{$host}{fn}, 1) = 0;
-                    close($Jobs{$host}{fh});
-                    delete($Jobs{$host});
-                    $Status{$host}{state}     = "Status_idle";
-                    $Status{$host}{reason}    = "Reason_backup_canceled_by_user"; #FIXME: user should be $user (we need to substitute the variable in the l10n stuff)
+                   #
+                   # Don't close the pipe now; wait until the child
+                   # really exits later.  Otherwise close() will
+                   # block until the child has exited.
+                   #  old code:
+                    ##vec($FDread, $Jobs{$host}{fn}, 1) = 0;
+                    ##close($Jobs{$host}{fh});
+                    ##delete($Jobs{$host});
+
+                    $Status{$host}{state}    = "Status_idle";
+                   if ( $Jobs{$host}{type} eq "restore" ) {
+                       $Status{$host}{reason}
+                                   = "Reason_restore_canceled_by_user";
+                   } else {
+                       $Status{$host}{reason}
+                                   = "Reason_backup_canceled_by_user";
+                   }
                     $Status{$host}{activeJob} = 0;
                     $Status{$host}{startTime} = time;
-                    $reply = "ok: backup of $host cancelled";
+                    $reply = "ok: $Jobs{$host}{type} of $host cancelled";
                 } elsif ( $BgQueueOn{$host} || $UserQueueOn{$host} ) {
                     print(LOG $bpc->timeStamp,
                                "Stopping pending backup of $host,"
@@ -1088,6 +1120,7 @@ sub Main_Check_Client_Messages
                                 user    => $user,
                                 reqTime => time,
                                 doFull  => $doFull,
+                                userReq => 1,
                                 dhcp    => $hostIP eq $host ? 0 : 1,
                         });
                     $UserQueueOn{$hostIP} = 1;
@@ -1116,6 +1149,7 @@ sub Main_Check_Client_Messages
                                 reqTime => time,
                                 dhcp    => 0,
                                 restore => 1,
+                               userReq => 1,
                         });
                     $UserQueueOn{$host} = 1;
                     if ( defined($Jobs{$host}) ) {
@@ -1416,8 +1450,9 @@ sub catch_signal
         print(LOG "Fatal error: unhandled signal $SigName\n");
         unlink("$TopDir/log/BackupPC.pid");
         confess("Got new signal $SigName... quitting\n");
+    } else {
+       $SigName = shift;
     }
-    $SigName = shift;
 }
 
 #