#!/usr/bin/perl #============================================================= -*-perl-*- # # BackupPC_nightly: Nightly cleanup & statistics script. # # DESCRIPTION # # BackupPC_nightly performs several administrative tasks: # # - monthly aging of per-PC log files (only with -m option) # # - pruning files from pool no longer used (ie: those with only one # hard link). # # - sending email to users and administrators (only with -m option) # # Usage: BackupPC_nightly [-m] poolRangeStart poolRangeEnd # # Flags: # # -m Do monthly aging of per-PC log files and sending of email. # Otherise, BackupPC_nightly just does pool pruning. # Since several BackupPC_nightly processes might run # concurrently, just the first one is given the -m flag # by BackupPC. # # The poolRangeStart and poolRangeEnd arguments are integers from 0 to 255. # These specify which parts of the pool to process. There are 256 2nd-level # directories in the pool (0/0, 0/1, ..., f/e, f/f). BackupPC_nightly # processes the given subset of this list (0 means 0/0, 255 means f/f). # Therefore, arguments of 0 255 process the entire pool, 0 127 does # the first half (ie: 0/0 through 7/f), 127 255 does the other half # (eg: 8/0 through f/f) and 0 15 does just the first 1/16 of the pool # (ie: 0/0 through 0/f). # # AUTHOR # Craig Barratt # # COPYRIGHT # Copyright (C) 2001-2009 Craig Barratt # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # #======================================================================== # # Version 3.2.0, released 31 Jul 2010. # # See http://backuppc.sourceforge.net. # #======================================================================== use strict; no utf8; use lib "/usr/local/BackupPC/lib"; use BackupPC::Lib qw( :BPC_DT_ALL ); use BackupPC::FileZIO; use Getopt::Std; use File::Path; use Data::Dumper; die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) ); my $TopDir = $bpc->TopDir(); my $BinDir = $bpc->BinDir(); my %Conf = $bpc->Conf(); my(%Status, %Info, %Jobs, @BgQueue, @UserQueue, @CmdQueue); # # We delete unused pool files (link count 1) in sorted inode # order by gathering batches. We delete the first half of # each batch (ie: $PendingDeleteMax / 2 at a time). # my @PendingDelete; my $PendingDeleteMax = 10240; $bpc->ChildInit(); my %opts; if ( !getopts("m", \%opts) || @ARGV != 2 ) { print("usage: $0 [-m] poolRangeStart poolRangeEnd\n"); exit(1); } if ( $ARGV[0] !~ /^(\d+)$/ || $1 > 255 ) { print("$0: bad poolRangeStart '$ARGV[0]'\n"); exit(1); } my $poolRangeStart = $1; if ( $ARGV[1] !~ /^(\d+)$/ || $1 > 255 ) { print("$0: bad poolRangeEnd '$ARGV[1]'\n"); exit(1); } my $poolRangeEnd = $1; if ( $opts{m} ) { my $err = $bpc->ServerConnect($Conf{ServerHost}, $Conf{ServerPort}); if ( $err ) { print("Can't connect to server ($err)\n"); exit(1); } my $reply = $bpc->ServerMesg("status hosts"); $reply = $1 if ( $reply =~ /(.*)/s ); eval($reply); } ########################################################################### # Get statistics on the pool, and remove files that have only one link. ########################################################################### my $fileCnt; # total number of files my $dirCnt; # total number of directories my $blkCnt; # total block size of files my $fileCntRm; # total number of removed files my $blkCntRm; # total block size of removed files my $blkCnt2; # total block size of files with just 2 links # (ie: files that only occur once among all backups) my $fileCntRep; # total number of file names containing "_", ie: files # that have repeated md5 checksums my $fileRepMax; # worse case number of files that have repeated checksums # (ie: max(nnn+1) for all names xxxxxxxxxxxxxxxx_nnn) my $fileLinkMax; # maximum number of hardlinks on a pool file my $fileLinkTotal; # total number of hardlinks on entire pool my $fileCntRename; # number of renamed files (to keep file numbering # contiguous) my %FixList; # list of paths that need to be renamed to avoid # new holes my @hexChars = qw(0 1 2 3 4 5 6 7 8 9 a b c d e f); for my $pool ( qw(pool cpool) ) { for ( my $i = $poolRangeStart ; $i <= $poolRangeEnd ; $i++ ) { my $dir = "$hexChars[int($i / 16)]/$hexChars[$i % 16]"; # print("Doing $pool/$dir\n") if ( ($i % 16) == 0 ); $fileCnt = 0; $dirCnt = 0; $blkCnt = 0; $fileCntRm = 0; $blkCntRm = 0; $blkCnt2 = 0; $fileCntRep = 0; $fileRepMax = 0; $fileLinkMax = 0; $fileCntRename = 0; %FixList = (); $bpc->find({wanted => \&GetPoolStats}, "$TopDir/$pool/$dir") if ( -d "$TopDir/$pool/$dir" ); my $kb = $blkCnt / 2; my $kbRm = $blkCntRm / 2; my $kb2 = $blkCnt2 / 2; # # Main BackupPC_nightly counts the top-level directory # $dirCnt++ if ( $opts{m} && -d "$TopDir/$pool" && $i == 0 ); # # Also count the next level directories # $dirCnt++ if ( ($i % 16) == 0 && -d "$TopDir/$pool/$hexChars[int($i / 16)]" ); # # We need to process all pending deletes before we do the # renames # if ( @PendingDelete ) { sleep(1); processPendingDeletes(1); } # # Now make sure that files with repeated checksums are still # sequentially numbered # foreach my $name ( sort(keys(%FixList)) ) { my $rmCnt = $FixList{$name} + 1; my $new = -1; for ( my $old = -1 ; ; $old++ ) { my $oldName = $name; $oldName .= "_$old" if ( $old >= 0 ); if ( !-f $oldName ) { # # We know we are done when we have missed at least # the number of files that were removed from this # base name, plus a couple just to be sure # last if ( $rmCnt-- <= 0 ); next; } my $newName = $name; $newName .= "_$new" if ( $new >= 0 ); $new++; next if ( $oldName eq $newName ); rename($oldName, $newName); $fileCntRename++; } } print("BackupPC_stats $i = $pool,$fileCnt,$dirCnt,$kb,$kb2,$kbRm," . "$fileCntRm,$fileCntRep,$fileRepMax," . "$fileCntRename,$fileLinkMax,$fileLinkTotal\n"); } } sleep(1); processPendingDeletes(1); ########################################################################### # Tell BackupPC that it is now ok to start running BackupPC_dump # commands. We are guaranteed that no BackupPC_link commands will # run since only a single CmdQueue command runs at a time, and # that means we are safe. As of 3.x this is irrelevant since # BackupPC_dump runs independent of BackupPC_dump. ########################################################################### printf("BackupPC_nightly lock_off\n"); ########################################################################### # Send email and generation of backupInfo files for each backup ########################################################################### if ( $opts{m} ) { print("log BackupPC_nightly now running BackupPC_sendEmail\n"); system("$BinDir/BackupPC_sendEmail"); doBackupInfoUpdate(); } # # Update the backupInfo files based on the backups file. # We do this just once a week (on Sun) since it is only # needed for old backups with BackupPC <= 2.1.2. # sub doBackupInfoUpdate { my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); return if ( $wday != 0 ); foreach my $host ( sort(keys(%{$bpc->HostInfoRead()})) ) { my @Backups = $bpc->BackupInfoRead($host); for ( my $i = 0 ; $i < @Backups ; $i++ ) { # # BackupPC::Storage->backupInfoWrite won't overwrite # an existing file # BackupPC::Storage->backupInfoWrite("$TopDir/pc/$host", $Backups[$i]{num}, $Backups[$i]); } } } sub GetPoolStats { my($file, $fullPath) = @_; my($inode, $nlinks, $nblocks) = (lstat($file))[1, 3, 12]; if ( -d _ ) { $dirCnt++; return; } elsif ( ! -f _ ) { return; } if ( $nlinks == 1 ) { $blkCntRm += $nblocks; $fileCntRm++; # # Save the files for later batch deletion. # # This is so we can remove them in inode order, and additionally # reduce any remaining chance of race condition of linking to # pool files vs removing pool files. (Other aspects of the # design should eliminate race conditions.) # push(@PendingDelete, { inode => $inode, path => $fullPath } ); if ( @PendingDelete > $PendingDeleteMax ) { processPendingDeletes(0); } # # We must keep repeated files numbered sequential (ie: files # that have the same checksum are appended with _0, _1 etc). # There are two cases: we remove the base file xxxx, but xxxx_0 # exists, or we remove any file of the form xxxx_nnn. We remember # the base name and fix it up later (not in the middle of find). # $fullPath =~ s/_\d+$//; $FixList{$fullPath}++; } else { if ( $file =~ /_(\d+)$/ ) { $fileRepMax = $1 + 1 if ( $fileRepMax <= $1 ); $fileCntRep++; } $fileCnt += 1; $blkCnt += $nblocks; $blkCnt2 += $nblocks if ( $nlinks == 2 ); $fileLinkMax = $nlinks if ( $fileLinkMax < $nlinks ); $fileLinkTotal += $nlinks - 1; } } sub processPendingDeletes { my($doAll) = @_; my @delete; if ( !$doAll ) { @delete = splice(@PendingDelete, 0, $PendingDeleteMax / 2); } else { @delete = @PendingDelete; @PendingDelete = (); } for my $f ( sort({ $a->{inode} <=> $b->{inode} } @delete) ) { my($nlinks) = (lstat($f->{path}))[3]; next if ( $nlinks != 1 ); # print("Deleting $f->{path} ($f->{inode})\n"); unlink($f->{path}); } }