# DESCRIPTION
#
# BackupPC reads the configuration and status information from
-# $TopDir/conf. It then runs and manages all the backup activity.
+# $ConfDir/conf. It then runs and manages all the backup activity.
#
# As specified by $Conf{WakeupSchedule}, BackupPC wakes up periodically
# to queue backups on all the PCs. This is a four step process:
#
#========================================================================
#
-# Version 2.1.0, released 20 Jun 2004.
+# Version 3.0.0, released 28 Jan 2007.
#
# See http://backuppc.sourceforge.net.
#
use Socket;
use Carp;
use Digest::MD5;
+use POSIX qw(setsid);
###########################################################################
# Handle command line options
die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) );
my $TopDir = $bpc->TopDir();
my $BinDir = $bpc->BinDir();
+my $LogDir = $bpc->LogDir();
my %Conf = $bpc->Conf();
#
#
# Read old status
#
-if ( -f "$TopDir/log/status.pl" && !(my $ret = do "$TopDir/log/status.pl") ) {
- die "couldn't parse $TopDir/log/status.pl: $@" if $@;
- die "couldn't do $TopDir/log/status.pl: $!" unless defined $ret;
- die "couldn't run $TopDir/log/status.pl";
+if ( -f "$LogDir/status.pl" && !(my $ret = do "$LogDir/status.pl") ) {
+ if ( $@ ) {
+ print STDERR "couldn't parse $LogDir/status.pl: $@";
+ } elsif ( !defined($ret) ) {
+ print STDERR "couldn't do $LogDir/status.pl: $!";
+ } else {
+ print STDERR "couldn't run $LogDir/status.pl";
+ }
}
#
if ( $opts{d} ) {
#
- # daemonize by forking
+ # daemonize by forking; more robust method per:
+ # http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=301057
#
- defined(my $pid = fork) or die "Can't fork: $!";
+ my $pid;
+ defined($pid = fork) or die("Can't fork: $!");
+ exit if( $pid ); # parent exits
+
+ POSIX::setsid();
+ defined($pid = fork) or die("Can't fork: $!");
exit if $pid; # parent exits
+
+ chdir ("/") or die("Cannot chdir to /: $!\n");
+ close(STDIN);
+ open(STDIN , ">/dev/null") or die("Cannot open /dev/null as stdin\n");
+ # STDOUT and STDERR are handled in LogFileOpen() right below,
+ # otherwise we would have to reopen them too.
}
#
# Write out our initial status and save our PID
#
StatusWrite();
- if ( open(PID, ">", "$TopDir/log/BackupPC.pid") ) {
+ unlink("$LogDir/BackupPC.pid");
+ if ( open(PID, ">", "$LogDir/BackupPC.pid") ) {
print(PID $$);
close(PID);
+ chmod(0444, "$LogDir/BackupPC.pid");
}
#
});
$CmdQueueOn{$bpc->trashJob} = 1;
}
- if ( keys(%Jobs) == $trashCleanRunning && $RunNightlyWhenIdle == 1 ) {
+ if ( $RunNightlyWhenIdle == 1 ) {
#
# Queue multiple nightly jobs based on the configuration
#
# Zero out the data we expect to get from BackupPC_nightly.
- # In the future if we want to split BackupPC_nightly over
- # more than one night we will only zero out the portion
- # that we are running right now.
#
for my $p ( qw(pool cpool) ) {
for ( my $i = $start ; $i < $end ; $i++ ) {
#
# Now queue the $Conf{MaxBackupPCNightlyJobs} jobs.
- # The granularity on start and end is now 0..256.
+ # The granularity on start and end is now 0..255.
#
$start *= 16;
$end *= 16;
$CmdQueueOn{$job} = 1;
}
$RunNightlyWhenIdle = 2;
-
}
}
sub Main_TryToRun_Bg_or_User_Queue
{
my($req, $host);
- while ( $RunNightlyWhenIdle == 0 ) {
+ my(@deferUserQueue, @deferBgQueue);
+ my $du;
+
+ if ( time - $Info{DUlastValueTime} >= 600 ) {
+ #
+ # Update our notion of disk usage no more than
+ # once every 10 minutes
+ #
+ $du = $bpc->CheckFileSystemUsage($TopDir);
+ $Info{DUlastValue} = $du;
+ $Info{DUlastValueTime} = time;
+ } else {
+ #
+ # if we recently checked it then just use the old value
+ #
+ $du = $Info{DUlastValue};
+ }
+ if ( $Info{DUDailyMaxReset} ) {
+ $Info{DUDailyMaxStartTime} = time;
+ $Info{DUDailyMaxReset} = 0;
+ $Info{DUDailyMax} = 0;
+ }
+ if ( $du > $Info{DUDailyMax} ) {
+ $Info{DUDailyMax} = $du;
+ $Info{DUDailyMaxTime} = time;
+ }
+ if ( $du > $Conf{DfMaxUsagePct} ) {
+ my @bgQueue = @BgQueue;
+ my $nSkip = 0;
+
+ #
+ # When the disk is too full, only run backups that will
+ # do expires, not regular backups
+ #
+ @BgQueue = ();
+ foreach $req ( @bgQueue ) {
+ if ( $req->{dumpExpire} ) {
+ unshift(@BgQueue, $req);
+ } else {
+ $BgQueueOn{$req->{host}} = 0;
+ $nSkip++;
+ }
+ }
+ if ( $nSkip ) {
+ print(LOG $bpc->timeStamp,
+ "Disk too full ($du%); skipped $nSkip hosts\n");
+ $Info{DUDailySkipHostCnt} += $nSkip;
+ }
+ }
+
+ #
+ # Run background jobs anytime. Previously they were locked out
+ # when BackupPC_nightly was running or pending with this
+ # condition on the while loop:
+ #
+ # while ( $RunNightlyWhenIdle == 0 )
+ #
+ while ( 1 ) {
local(*FH);
- my(@args, @deferUserQueue, @deferBgQueue, $progName, $type);
+ my(@args, $progName, $type);
my $nJobs = keys(%Jobs);
#
# CmdJob and trashClean don't count towards MaxBackups / MaxUserBackups
#
- $nJobs -= $BackupPCNightlyJobs if ( $CmdJob ne "" );
+ if ( $CmdJob ne "" ) {
+ if ( $BackupPCNightlyJobs ) {
+ $nJobs -= $BackupPCNightlyJobs;
+ } else {
+ $nJobs--;
+ }
+ }
$nJobs-- if ( defined($Jobs{$bpc->trashJob} ) );
if ( $nJobs < $Conf{MaxBackups} + $Conf{MaxUserBackups}
&& @UserQueue > 0 ) {
&& (@CmdQueue + $nJobs)
<= $Conf{MaxBackups} + $Conf{MaxPendingCmds}
&& @BgQueue > 0 ) {
- my $du;
- if ( time - $Info{DUlastValueTime} >= 60 ) {
- #
- # Update our notion of disk usage no more than
- # once every minute
- #
- $du = $bpc->CheckFileSystemUsage($TopDir);
- $Info{DUlastValue} = $du;
- $Info{DUlastValueTime} = time;
- } else {
- #
- # if we recently checked it then just use the old value
- #
- $du = $Info{DUlastValue};
- }
- if ( $Info{DUDailyMaxReset} ) {
- $Info{DUDailyMaxStartTime} = time;
- $Info{DUDailyMaxReset} = 0;
- $Info{DUDailyMax} = 0;
- }
- if ( $du > $Info{DUDailyMax} ) {
- $Info{DUDailyMax} = $du;
- $Info{DUDailyMaxTime} = time;
- }
- if ( $du > $Conf{DfMaxUsagePct} ) {
- my $nSkip = @BgQueue + @deferBgQueue;
- print(LOG $bpc->timeStamp,
- "Disk too full ($du%); skipping $nSkip hosts\n");
- $Info{DUDailySkipHostCnt} += $nSkip;
- @BgQueue = ();
- @deferBgQueue = ();
- %BgQueueOn = ();
- next;
- }
$req = pop(@BgQueue);
if ( defined($Jobs{$req->{host}}) ) {
- push(@deferBgQueue, $req);
+ #
+ # Job is currently running for this host; save it for later
+ #
+ unshift(@deferBgQueue, $req);
next;
}
$BgQueueOn{$req->{host}} = 0;
} else {
- while ( @deferBgQueue ) {
- push(@BgQueue, pop(@deferBgQueue));
- }
- while ( @deferUserQueue ) {
- push(@UserQueue, pop(@deferUserQueue));
- }
+ #
+ # Restore the deferred jobs
+ #
+ @BgQueue = (@BgQueue, @deferBgQueue);
+ @UserQueue = (@UserQueue, @deferUserQueue);
last;
}
$host = $req->{host};
$Info{DUDailySkipHostCntPrev} = $Info{DUDailySkipHostCnt};
$Info{DUDailySkipHostCnt} = 0;
my $lastLog = $Conf{MaxOldLogFiles} - 1;
- if ( -f "$TopDir/log/LOG.$lastLog" ) {
+ if ( -f "$LogDir/LOG.$lastLog" ) {
print(LOG $bpc->timeStamp,
- "Removing $TopDir/log/LOG.$lastLog\n");
- unlink("$TopDir/log/LOG.$lastLog");
+ "Removing $LogDir/LOG.$lastLog\n");
+ unlink("$LogDir/LOG.$lastLog");
}
- if ( -f "$TopDir/log/LOG.$lastLog.z" ) {
+ if ( -f "$LogDir/LOG.$lastLog.z" ) {
print(LOG $bpc->timeStamp,
- "Removing $TopDir/log/LOG.$lastLog.z\n");
- unlink("$TopDir/log/LOG.$lastLog.z");
+ "Removing $LogDir/LOG.$lastLog.z\n");
+ unlink("$LogDir/LOG.$lastLog.z");
}
print(LOG $bpc->timeStamp, "Aging LOG files, LOG -> LOG.0 -> "
. "LOG.1 -> ... -> LOG.$lastLog\n");
close(LOG);
for ( my $i = $lastLog - 1 ; $i >= 0 ; $i-- ) {
my $j = $i + 1;
- rename("$TopDir/log/LOG.$i", "$TopDir/log/LOG.$j")
- if ( -f "$TopDir/log/LOG.$i" );
- rename("$TopDir/log/LOG.$i.z", "$TopDir/log/LOG.$j.z")
- if ( -f "$TopDir/log/LOG.$i.z" );
+ rename("$LogDir/LOG.$i", "$LogDir/LOG.$j")
+ if ( -f "$LogDir/LOG.$i" );
+ rename("$LogDir/LOG.$i.z", "$LogDir/LOG.$j.z")
+ if ( -f "$LogDir/LOG.$i.z" );
}
#
# Compress the log file LOG -> LOG.0.z (if enabled).
# Otherwise, just rename LOG -> LOG.0.
#
- BackupPC::FileZIO->compressCopy("$TopDir/log/LOG",
- "$TopDir/log/LOG.0.z",
- "$TopDir/log/LOG.0",
+ BackupPC::FileZIO->compressCopy("$LogDir/LOG",
+ "$LogDir/LOG.0.z",
+ "$LogDir/LOG.0",
$Conf{CompressLevel}, 1);
LogFileOpen();
#
- # Remember to run nightly script after current jobs are done
+ # Remember to run the nightly script when the next CmdQueue
+ # job is done.
#
$RunNightlyWhenIdle = 1;
}
delete($Status{$host}{error});
delete($Status{$host}{errorTime});
$Status{$host}{endTime} = time;
+ $Status{$host}{lastGoodBackupTime} = time;
} elsif ( $mesg =~ /^backups disabled/ ) {
print(LOG $bpc->timeStamp,
"Ignoring old backup error on $host\n");
$Info{pool}{$f[0]}[$chunk]{FileCntRename} += $f[9];
$Info{pool}{$f[0]}[$chunk]{FileLinkMax} = $f[10]
if ( $Info{pool}{$f[0]}[$chunk]{FileLinkMax} < $f[10] );
+ $Info{pool}{$f[0]}[$chunk]{FileLinkTotal} += $f[11];
$Info{pool}{$f[0]}[$chunk]{Time} = time;
} elsif ( $mesg =~ /^BackupPC_nightly lock_off/ ) {
$BackupPCNightlyLock--;
if ( $BackupPCNightlyLock == 0 ) {
#
# This means the last BackupPC_nightly is done with
- # the pool clean, so it's to start running regular
- # backups again.
+ # the pool clean, so it's ok to start running regular
+ # backups again. But starting in 3.0 regular jobs
+ # are decoupled from BackupPC_nightly.
#
$RunNightlyWhenIdle = 0;
}
#print(LOG $bpc->timeStamp, "BackupPC_nightly done; now"
# . " have $BackupPCNightlyJobs running\n");
if ( $BackupPCNightlyJobs <= 0 ) {
+ #
+ # Last BackupPC_nightly has finished
+ #
$BackupPCNightlyJobs = 0;
$RunNightlyWhenIdle = 0;
$CmdJob = "";
"User $user requested backup of unknown host"
. " $host\n");
$reply = "error: unknown host $host";
- } elsif ( defined($Jobs{$host})
- && $Jobs{$host}{type} ne "restore" ) {
- print(LOG $bpc->timeStamp,
- "User $user requested backup of $host,"
- . " but one is currently running\n");
- $reply = "error: backup of $host is already running";
} else {
print(LOG $bpc->timeStamp,
"User $user requested backup of $host"
###########################################################################
#
-# Write the current status to $TopDir/log/status.pl
+# Write the current status to $LogDir/status.pl
#
sub StatusWrite
{
[ \%Info, \%Status],
[qw(*Info *Status)]);
$dump->Indent(1);
- if ( open(STATUS, ">", "$TopDir/log/status.pl") ) {
+ if ( open(STATUS, ">", "$LogDir/status.pl") ) {
print(STATUS $dump->Dump);
close(STATUS);
}
#
sub HostSortCompare
{
+ #
+ # Hosts with errors go before hosts without errors
+ #
return -1 if ( $Status{$a}{error} ne "" && $Status{$b}{error} eq "" );
+
+ #
+ # Hosts with no errors go after hosts with errors
+ #
+
return 1 if ( $Status{$a}{error} eq "" && $Status{$b}{error} ne "" );
- return $Status{$a}{endTime} <=> $Status{$b}{endTime};
+
+ #
+ # hosts with the older last good backups sort earlier
+ #
+ my $r = $Status{$a}{lastGoodBackupTime} <=> $Status{$b}{lastGoodBackupTime};
+ return $r if ( $r );
+
+ #
+ # Finally, just sort based on host name
+ #
+ return $a cmp $b;
}
#
#
sub QueueAllPCs
{
+ my $nSkip = 0;
foreach my $host ( sort(HostSortCompare keys(%$Hosts)) ) {
delete($Status{$host}{backoffTime})
if ( defined($Status{$host}{backoffTime})
#
# this is a fixed ip host: queue it
#
- unshift(@BgQueue,
- {host => $host, user => "BackupPC", reqTime => time,
- dhcp => $Hosts->{$host}{dhcp}});
+ if ( $Info{DUlastValue} > $Conf{DfMaxUsagePct} ) {
+ #
+ # Since we are out of disk space, instead of queuing
+ # a regular job, queue an expire check instead. That
+ # way if the admin reduces the number of backups to
+ # keep then we will actually delete them. Otherwise
+ # BackupPC_dump will never run since we have exceeded
+ # the limit.
+ #
+ $nSkip++;
+ unshift(@BgQueue,
+ {host => $host, user => "BackupPC", reqTime => time,
+ dhcp => $Hosts->{$host}{dhcp}, dumpExpire => 1});
+ } else {
+ #
+ # Queue regular background backup
+ #
+ unshift(@BgQueue,
+ {host => $host, user => "BackupPC", reqTime => time,
+ dhcp => $Hosts->{$host}{dhcp}});
+ }
$BgQueueOn{$host} = 1;
}
}
+ if ( $nSkip ) {
+ print(LOG $bpc->timeStamp,
+ "Disk too full ($Info{DUlastValue}%); skipped $nSkip hosts\n");
+ $Info{DUDailySkipHostCnt} += $nSkip;
+ }
foreach my $dhcp ( @{$Conf{DHCPAddressRanges}} ) {
for ( my $i = $dhcp->{first} ; $i <= $dhcp->{last} ; $i++ ) {
my $ipAddr = "$dhcp->{ipAddrBase}.$i";
close(LOG);
LogFileOpen();
print(LOG "Fatal error: unhandled signal $SigName\n");
- unlink("$TopDir/log/BackupPC.pid");
+ unlink("$LogDir/BackupPC.pid");
confess("Got new signal $SigName... quitting\n");
} else {
$SigName = shift;
#
sub LogFileOpen
{
- mkpath("$TopDir/log", 0, 0777) if ( !-d "$TopDir/log" );
- open(LOG, ">>$TopDir/log/LOG")
- || die("Can't create LOG file $TopDir/log/LOG");
+ mkpath($LogDir, 0, 0777) if ( !-d $LogDir );
+ open(LOG, ">>$LogDir/LOG")
+ || die("Can't create LOG file $LogDir/LOG");
close(STDOUT);
close(STDERR);
open(STDOUT, ">&LOG");
print(LOG $bpc->timeStamp, "unix socket() failed: $!\n");
exit(1);
}
- my $sockFile = "$TopDir/log/BackupPC.sock";
+ my $sockFile = "$LogDir/BackupPC.sock";
unlink($sockFile);
if ( !bind(SERVER_UNIX, sockaddr_un($sockFile)) ) {
print(LOG $bpc->timeStamp, "unix bind() failed: $!\n");
}
%Jobs = ();
}
+ delete($Info{pid});
StatusWrite();
- unlink("$TopDir/log/BackupPC.pid");
+ unlink("$LogDir/BackupPC.pid");
exit(1);
}