#!/bin/perl #============================================================= -*-perl-*- # # BackupPC_compressPool: Compress existing pool # # DESCRIPTION # # Usage: BackupPC_compressPool [-t] [-r] # # Flags: # -t test mode: do everything except actually replace the pool files. # Useful for estimating total run time without making any real # changes. # -r read check: re-read the compressed file and compare it against # the original uncompressed file. Can only be used in test mode. # -c # number of children to fork. BackupPC_compressPool can take # a long time to run, so to speed things up it spawns four children, # each working on a different part of the pool. You can change # the number of children with the -c option. # # BackupPC_compressPool is used to convert an uncompressed pool to # a compressed pool. If BackupPC compression is enabled after # uncompressed backups already exist, BackupPC_compressPool can # be used to compress all the old uncompressed backups. # # It is important that BackupPC not run while BackupPC_compressPool # runs. Also, BackupPC_compressPool must run to completion before # BackupPC is restarted. # # AUTHOR # Craig Barratt # # COPYRIGHT # Copyright (C) 2001-2003 Craig Barratt # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # #======================================================================== # # Version 3.0.0, released 28 Jan 2007. # # See http://backuppc.sourceforge.net. # #======================================================================== use strict; no utf8; use File::Find; use File::Path; use Compress::Zlib; use Getopt::Std; use lib "/usr/local/BackupPC/lib"; use BackupPC::Lib; use BackupPC::FileZIO; die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) ); $bpc->ChildInit(); my $TopDir = $bpc->TopDir(); my $LogDir = $bpc->LogDir(); my $BinDir = $bpc->BinDir(); my %Conf = $bpc->Conf(); my $PoolDir = "$TopDir/pool"; my $CPoolDir = "$TopDir/cpool"; my $Compress = $Conf{CompressLevel}; my %opts; my $SigName = ""; # # Catch various signals # foreach my $sig ( qw(INT BUS SEGV PIPE TERM ALRM HUP) ) { $SIG{$sig} = \&catch_signal; } $| = 1; my $CompMaxRead = 131072; # 128K my $CompMaxWrite = 6291456; # 6MB if ( !getopts("trc:", \%opts) || @ARGV != 0 ) { print("usage: $0 [-c nChild] [-r] [-t]\n"); exit(1); } my $TestMode = $opts{t}; my $ReadCheck = $opts{r}; my $nChild = $opts{c} || 4; if ( $ReadCheck && !$TestMode ) { print(STDERR "$0: -r (read check) option must have -t (test)\n"); exit(1); } if ( $nChild < 1 || $nChild >= 16 ) { print(STDERR "$0: number of children (-c option) must be from 1 to 16\n"); exit(1); } if ( !BackupPC::FileZIO->compOk ) { print STDERR <ServerConnect($Conf{ServerHost}, $Conf{ServerPort}); if ( $err eq "" ) { print < 65536, -Level => $Compress, ); if ( !open(FH, $TestMode ? "<" : "+<", $file) ) { print("Error: Can't open $file for read/write\n"); $Errors++; return; } binmode(FH); while ( sysread(FH, $dataIn, $CompMaxWrite) > 0 ) { $flush = 0; $FileOrigSz += length($dataIn); my $fragOut = $defl->deflate($dataIn); if ( length($fragOut) < $CompMaxRead ) { # # Compression is too high: to avoid huge memory requirements # on read we need to flush(). # $fragOut .= $defl->flush(); $flush = 1; $defl = deflateInit( -Bufsize => 65536, -Level => $Compress, ); } $dataOut .= $fragOut; if ( !$copy && length($dataOut) > $CompMaxWrite ) { if ( !open(OUT, "+>", "$file.__z") ) { print("Error: Can't open $file.__z for write\n"); $Errors++; close(FH); return; } binmode(OUT); $copy = 1; } if ( $copy && $dataOut ne "" ) { if ( syswrite(OUT, $dataOut) != length($dataOut) ) { printf("Error: Can't write %d bytes to %s\n", length($dataOut), "$file.__z"); $Errors++; close(OUT); close(FH); unlink("$file.__z"); return; } $FileCompressSz += length($dataOut); $dataOut = undef; } } if ( !$flush ) { $dataOut .= $defl->flush(); if ( $copy && $dataOut ne "" ) { if ( syswrite(OUT, $dataOut) != length($dataOut) ) { printf("Error: Can't write %d bytes to %s\n", length($dataOut), "$file.__z"); $Errors++; close(OUT); close(FH); unlink("$file.__z"); return; } $FileCompressSz += length($dataOut); $dataOut = undef; } } my $newFile = cpoolFileName($file); if ( $TestMode ) { close(FH); if ( !open(FH, ">", $newFile) ) { print("Error: Can't open $newFile for write\n"); $Errors++; close(FH); unlink("$file.__z"); return; } binmode(FH); } if ( $copy ) { if ( !sysseek(OUT, 0, 0) ) { print("Error: Can't seek $file.__z to 0\n"); $Errors++; } if ( !sysseek(FH, 0, 0) ) { print("Error: Can't seek $newFile to 0\n"); $Errors++; } while ( sysread(OUT, $dataIn, $CompMaxWrite) > 0 ) { if ( syswrite(FH, $dataIn) != length($dataIn) ) { printf("Error: Can't write %d bytes to %s\n", length($dataIn), $file); $Errors++; } } if ( !truncate(FH, sysseek(OUT, 0, 1)) ) { printf("Error: Can't truncate %s to %d\n", $file, sysseek(OUT, 0, 1)); $Errors++; } close(OUT); close(FH); unlink("$file.__z"); } else { if ( !sysseek(FH, 0, 0) ) { print("Error: Can't seek $file to 0\n"); $Errors++; } if ( syswrite(FH, $dataOut) != length($dataOut) ) { printf("Error: Can't write %d bytes to %s\n", length($dataOut), $file); $Errors++; } $FileCompressSz += length($dataOut); if ( !truncate(FH, length($dataOut)) ) { printf("Error: Can't truncate %s to %d\n", $file, length($dataOut)); $Errors++; } close(FH); } if ( $TestMode ) { if ( $ReadCheck ) { checkRead($file, $newFile); } unlink($newFile); } else { rename($file, $newFile); my $atime = $s[8] =~ /(.*)/ && $1; my $mtime = $s[9] =~ /(.*)/ && $1; utime($atime, $mtime, $newFile); } (my $dir = $file) =~ s{/[^/]*$}{}; $FileCnt++; if ( $SubDirCurr ne "" && $SubDirCurr ne $dir ) { $SubDirDone++; $SubDirCurr = $dir; reportStats(); } elsif ( $SubDirCurr eq "" ) { $SubDirCurr = $dir; } } sub reportStats { print("stats: $SubDirDone $SubDirCnt $FileCnt $FileOrigSz" . " $FileCompressSz $Errors\n"); } sub checkRead { my($file, $cfile) = @_; return if ( !-f $file || !-f $cfile ); my $f = BackupPC::FileZIO->open($cfile, 0, $Compress) || die("can't open $cfile for read\n"); my($n, $nd, $r, $d, $d0); local(*FH); if ( !open(FH, "<", $file) ) { print("can't open $file for check\n"); $Errors++; $f->close(); return; } binmode(FH); #print("comparing $file to $cfile\n"); while ( 1 ) { $n = 1 + int(rand($CompMaxRead) + rand(100)); $r = $f->read(\$d, $n); sysread(FH, $d0, $n); if ( $d ne $d0 ) { print("Botch read data on $cfile\n"); } last if ( length($d) == 0 ); } if ( ($r = $f->read(\$d, 100)) != 0 || ($r = $f->read(\$d, 100)) != 0 ) { printf("Botch at EOF on $cfile got $r (%d,%d)\n", sysseek(FH, 0, 1), $n); $Errors++; } $f->close; close(FH); } sub checkReadLine { my($file, $cfile) = @_; return if ( !-f $file || !-f $cfile ); my $f = BackupPC::FileZIO->open($cfile, 0, $Compress) || die("can't open $cfile for read\n"); my($n, $nd, $r, $d, $d0); local(*FH); if ( !open(FH, "<", $file) ) { print("can't open $file for check\n"); $Errors++; $f->close(); return; } binmode(FH); while ( 1 ) { $d0 = ; $d = $f->readLine(); if ( $d ne $d0 ) { print("Botch read data on $cfile\n"); } last if ( length($d) == 0 ); } if ( ($r = $f->read(\$d, 100)) != 0 || ($r = $f->read(\$d, 100)) != 0 ) { printf("Botch at EOF on $cfile got $r (%d,%d)\n", sysseek(FH, 0, 1), $n); $Errors++; } $f->close; close(FH); } sub catch_signal { $SigName = shift; } sub compressHostFiles { my($host) = @_; my(@Files, @Backups, $fh, $data); local(*FH); if ( !defined($host) ) { for ( my $i = 0 ; ; $i++ ) { last if ( !-f "$LogDir/LOG.$i" ); push(@Files, "$LogDir/LOG.$i"); } } else { @Backups = $bpc->BackupInfoRead($host); for ( my $i = 0 ; $i < @Backups ; $i++ ) { next if ( $Backups[$i]{compress} ); push(@Files, "$TopDir/pc/$host/SmbLOG.$Backups[$i]{num}"); push(@Files, "$TopDir/pc/$host/XferLOG.$Backups[$i]{num}"); } push(@Files, "$TopDir/pc/$host/SmbLOG.bad"); push(@Files, "$TopDir/pc/$host/XferLOG.bad"); for ( my $i = 0 ; ; $i++ ) { last if ( !-f "$TopDir/pc/$host/LOG.$i" ); push(@Files, "$TopDir/pc/$host/LOG.$i"); } } foreach my $file ( @Files ) { if ( $SigName ) { print("Child got signal $SigName; quitting\n"); reportStats(); exit(0); } next if ( !-f $file ); if ( !BackupPC::FileZIO->compressCopy($file, "$file.z", undef, $Compress, !$TestMode) ) { print("compressCopy($file, $file.z, $Compress, !$TestMode)" . " failed\n"); $Errors++; } elsif ( $TestMode ) { checkReadLine($file, "$file.z") if ( $ReadCheck ); unlink("$file.z"); } } } sub updateHostBackupInfo { my($host) = @_; if ( !$TestMode ) { my @Backups = $bpc->BackupInfoRead($host); for ( my $i = 0 ; $i < @Backups ; $i++ ) { $Backups[$i]{compress} = $Compress; } $bpc->BackupInfoWrite($host, @Backups); } } my @Dirs = split(//, "0123456789abcdef"); my @Hosts = sort(keys(%{$bpc->HostInfoRead()})); my $FDread; my @Jobs; # # First make sure there are no existing compressed backups # my(%compHosts, $compCnt); for ( my $j = 0 ; $j < @Hosts ; $j++ ) { my $host = $Hosts[$j]; my @Backups = $bpc->BackupInfoRead($host); for ( my $i = 0 ; $i < @Backups ; $i++ ) { next if ( !$Backups[$i]{compress} ); $compHosts{$host}++; $compCnt++; } } if ( $compCnt ) { my $compHostStr = join("\n + ", sort(keys(%compHosts))); print STDERR < ) { next if ( !opendir(DIR, $dir) ); my @files = readdir(DIR); closedir(DIR); $SubDirCnt++ if ( @files > 2 ); } } # # Now process each of the directories # for ( my $j = 0 ; $j < $nDirs ; $j++ ) { my $thisDir = shift(@Dirs); next if ( !-d "$PoolDir/$thisDir" ); find({wanted => sub { doCompress($File::Find::name); }, no_chdir => 1}, "$PoolDir/$thisDir"); } # # Last, update the backup info file for each of the hosts # for ( my $j = 0 ; $j < $nHosts ; $j++ ) { updateHostBackupInfo($Hosts[$j]); } $SubDirDone = $SubDirCnt; reportStats(); exit(0); } # # This is the parent. Peel off $nDirs directories, $nHosts hosts, # and continue # $Jobs[$i]{fh} = *CHILD; $Jobs[$i]{pid} = $pid; vec($FDread, fileno($Jobs[$i]{fh}), 1) = 1; splice(@Dirs, 0, $nDirs); splice(@Hosts, 0, $nHosts); } # # compress the main log files (in the parents) # compressHostFiles(undef); # # Now wait for all the children to report results and finish up # my $TimeStart = time; my $DonePct = 0; my $GotSignal = ""; while ( $FDread !~ /^\0*$/ ) { my $ein = $FDread; select(my $rout = $FDread, undef, $ein, undef); if ( $SigName ne $GotSignal ) { print("Got signal $SigName; waiting for $nChild children to cleanup\n"); $GotSignal = $SigName; } for ( my $i = 0 ; $i < $nChild ; $i++ ) { next if ( !vec($rout, fileno($Jobs[$i]{fh}), 1) ); my $data; if ( sysread($Jobs[$i]{fh}, $data, 1024) <= 0 ) { vec($FDread, fileno($Jobs[$i]{fh}), 1) = 0; close($Jobs[$i]{fh}); next; } $Jobs[$i]{mesg} .= $data; while ( $Jobs[$i]{mesg} =~ /(.*?)[\n\r]+(.*)/s ) { my $mesg = $1; $Jobs[$i]{mesg} = $2; if ( $mesg =~ /^stats: (\d+) (\d+) (\d+) (\d+) (\d+) (\d+)/ ) { $Jobs[$i]{SubDirDone} = $1; $Jobs[$i]{SubDirCnt} = $2; $Jobs[$i]{FileCnt} = $3; $Jobs[$i]{FileOrigSz} = $4; $Jobs[$i]{FileCompressSz} = $5; $Jobs[$i]{Errors} = $6; $SubDirDone = $SubDirCnt = $FileCnt = $FileOrigSz = 0; $FileCompressSz = $Errors = 0; my $numReports = 0; for ( my $j = 0 ; $j < $nChild ; $j++ ) { next if ( !defined($Jobs[$j]{SubDirDone}) ); $SubDirDone += $Jobs[$j]{SubDirDone}; $SubDirCnt += $Jobs[$j]{SubDirCnt}; $FileCnt += $Jobs[$j]{FileCnt}; $FileOrigSz += $Jobs[$j]{FileOrigSz}; $FileCompressSz += $Jobs[$j]{FileCompressSz}; $Errors += $Jobs[$j]{Errors}; $numReports++; } $SubDirCnt ||= 1; $FileOrigSz ||= 1; my $pctDone = 100 * $SubDirDone / $SubDirCnt; if ( $numReports == $nChild && $pctDone >= $DonePct + 1 ) { $DonePct = int($pctDone); my $estSecLeft = 1.2 * (time - $TimeStart) * (100 / $pctDone - 1); my $timeStamp = $bpc->timeStamp; printf("%sDone %2.0f%% (%d of %d dirs, %d files," . " %.2fGB raw, %.1f%% reduce, %d errors)\n", $timeStamp, $pctDone, $SubDirDone, $SubDirCnt, $FileCnt, $FileOrigSz / (1024 * 1024 * 1000), 100 * (1 - $FileCompressSz / $FileOrigSz)); printf("%s Est complete in %.1f hours (around %s)\n", $timeStamp, $estSecLeft / 3600, $bpc->timeStamp(time + $estSecLeft, 1)) if ( $DonePct < 100 ); } } else { print($mesg, "\n"); } } } } if ( $Errors ) { print("Finished with $Errors errors!!!!\n"); exit(1); }