2 #============================================================= -*-perl-*-
4 # BackupPC_compressPool: Compress existing pool
8 # Usage: BackupPC_compressPool [-t] [-r] <host>
11 # -t test mode: do everything except actually replace the pool files.
12 # Useful for estimating total run time without making any real
14 # -r read check: re-read the compressed file and compare it against
15 # the original uncompressed file. Can only be used in test mode.
16 # -c # number of children to fork. BackupPC_compressPool can take
17 # a long time to run, so to speed things up it spawns four children,
18 # each working on a different part of the pool. You can change
19 # the number of children with the -c option.
21 # BackupPC_compressPool is used to convert an uncompressed pool to
22 # a compressed pool. If BackupPC compression is enabled after
23 # uncompressed backups already exist, BackupPC_compressPool can
24 # be used to compress all the old uncompressed backups.
26 # It is important that BackupPC not run while BackupPC_compressPool
27 # runs. Also, BackupPC_compressPool must run to completion before
28 # BackupPC is restarted.
31 # Craig Barratt <cbarratt@users.sourceforge.net>
34 # Copyright (C) 2001-2003 Craig Barratt
36 # This program is free software; you can redistribute it and/or modify
37 # it under the terms of the GNU General Public License as published by
38 # the Free Software Foundation; either version 2 of the License, or
39 # (at your option) any later version.
41 # This program is distributed in the hope that it will be useful,
42 # but WITHOUT ANY WARRANTY; without even the implied warranty of
43 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 # GNU General Public License for more details.
46 # You should have received a copy of the GNU General Public License
47 # along with this program; if not, write to the Free Software
48 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
50 #========================================================================
52 # Version 3.0.0, released 28 Jan 2007.
54 # See http://backuppc.sourceforge.net.
56 #========================================================================
65 use lib "/usr/local/BackupPC/lib";
67 use BackupPC::FileZIO;
69 die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) );
71 my $TopDir = $bpc->TopDir();
72 my $LogDir = $bpc->LogDir();
73 my $BinDir = $bpc->BinDir();
74 my %Conf = $bpc->Conf();
75 my $PoolDir = "$TopDir/pool";
76 my $CPoolDir = "$TopDir/cpool";
77 my $Compress = $Conf{CompressLevel};
82 # Catch various signals
84 foreach my $sig ( qw(INT BUS SEGV PIPE TERM ALRM HUP) ) {
85 $SIG{$sig} = \&catch_signal;
90 my $CompMaxRead = 131072; # 128K
91 my $CompMaxWrite = 6291456; # 6MB
93 if ( !getopts("trc:", \%opts) || @ARGV != 0 ) {
94 print("usage: $0 [-c nChild] [-r] [-t]\n");
97 my $TestMode = $opts{t};
98 my $ReadCheck = $opts{r};
99 my $nChild = $opts{c} || 4;
100 if ( $ReadCheck && !$TestMode ) {
101 print(STDERR "$0: -r (read check) option must have -t (test)\n");
104 if ( $nChild < 1 || $nChild >= 16 ) {
105 print(STDERR "$0: number of children (-c option) must be from 1 to 16\n");
108 if ( !BackupPC::FileZIO->compOk ) {
110 $0: Compress::Zlib is not installed. You need to install it
111 before running this script.
115 if ( $Compress <= 0 ) {
117 $0: compression is not enabled. \%Conf{CompressLevel} needs
118 to be set to a value from 1 to 9. Please edit the config.pl file and
130 my $FileCompressSz = 0;
132 my $err = $bpc->ServerConnect($Conf{ServerHost}, $Conf{ServerPort});
135 BackupPC is running on $Conf{ServerHost}. You need to stop BackupPC
136 before you can upgrade the code. Depending upon your installation,
137 you could run "/etc/init.d/backuppc stop".
142 umask($Conf{UmaskMode});
147 if ( $new !~ m{/(\w/\w/\w)/(\w{32})(_\d+)?$} ) {
148 print("Error: Can't parse filename from $new\n");
152 my $dir = "$CPoolDir/$1";
154 mkpath($dir, 0, 0777) if ( !-d $dir );
155 return $new if ( !-f $new );
156 for ( my $i = 0 ; ; $i++ ) {
157 return "${new}_$i" if ( !-f "${new}_$i" );
163 my $file = ($File::Find::name =~ /(.*)/ && $1);
165 my(@s) = stat($file);
166 my($n, $dataIn, $dataOut, $flush, $copy);
169 print("Child got signal $SigName; quitting\n");
173 return if ( !-f $file );
174 my $defl = deflateInit(
178 if ( !open(FH, $TestMode ? "<" : "+<", $file) ) {
179 print("Error: Can't open $file for read/write\n");
184 while ( sysread(FH, $dataIn, $CompMaxWrite) > 0 ) {
186 $FileOrigSz += length($dataIn);
187 my $fragOut = $defl->deflate($dataIn);
188 if ( length($fragOut) < $CompMaxRead ) {
190 # Compression is too high: to avoid huge memory requirements
191 # on read we need to flush().
193 $fragOut .= $defl->flush();
200 $dataOut .= $fragOut;
201 if ( !$copy && length($dataOut) > $CompMaxWrite ) {
202 if ( !open(OUT, "+>", "$file.__z") ) {
203 print("Error: Can't open $file.__z for write\n");
211 if ( $copy && $dataOut ne "" ) {
212 if ( syswrite(OUT, $dataOut) != length($dataOut) ) {
213 printf("Error: Can't write %d bytes to %s\n",
214 length($dataOut), "$file.__z");
221 $FileCompressSz += length($dataOut);
226 $dataOut .= $defl->flush();
227 if ( $copy && $dataOut ne "" ) {
228 if ( syswrite(OUT, $dataOut) != length($dataOut) ) {
229 printf("Error: Can't write %d bytes to %s\n",
230 length($dataOut), "$file.__z");
237 $FileCompressSz += length($dataOut);
241 my $newFile = cpoolFileName($file);
244 if ( !open(FH, ">", $newFile) ) {
245 print("Error: Can't open $newFile for write\n");
254 if ( !sysseek(OUT, 0, 0) ) {
255 print("Error: Can't seek $file.__z to 0\n");
258 if ( !sysseek(FH, 0, 0) ) {
259 print("Error: Can't seek $newFile to 0\n");
262 while ( sysread(OUT, $dataIn, $CompMaxWrite) > 0 ) {
263 if ( syswrite(FH, $dataIn) != length($dataIn) ) {
264 printf("Error: Can't write %d bytes to %s\n",
265 length($dataIn), $file);
269 if ( !truncate(FH, sysseek(OUT, 0, 1)) ) {
270 printf("Error: Can't truncate %s to %d\n",
271 $file, sysseek(OUT, 0, 1));
278 if ( !sysseek(FH, 0, 0) ) {
279 print("Error: Can't seek $file to 0\n");
282 if ( syswrite(FH, $dataOut) != length($dataOut) ) {
283 printf("Error: Can't write %d bytes to %s\n",
284 length($dataOut), $file);
287 $FileCompressSz += length($dataOut);
288 if ( !truncate(FH, length($dataOut)) ) {
289 printf("Error: Can't truncate %s to %d\n", $file, length($dataOut));
296 checkRead($file, $newFile);
300 rename($file, $newFile);
301 my $atime = $s[8] =~ /(.*)/ && $1;
302 my $mtime = $s[9] =~ /(.*)/ && $1;
303 utime($atime, $mtime, $newFile);
305 (my $dir = $file) =~ s{/[^/]*$}{};
307 if ( $SubDirCurr ne "" && $SubDirCurr ne $dir ) {
311 } elsif ( $SubDirCurr eq "" ) {
318 print("stats: $SubDirDone $SubDirCnt $FileCnt $FileOrigSz"
319 . " $FileCompressSz $Errors\n");
324 my($file, $cfile) = @_;
325 return if ( !-f $file || !-f $cfile );
326 my $f = BackupPC::FileZIO->open($cfile, 0, $Compress)
327 || die("can't open $cfile for read\n");
328 my($n, $nd, $r, $d, $d0);
331 if ( !open(FH, "<", $file) ) {
332 print("can't open $file for check\n");
338 #print("comparing $file to $cfile\n");
340 $n = 1 + int(rand($CompMaxRead) + rand(100));
341 $r = $f->read(\$d, $n);
342 sysread(FH, $d0, $n);
344 print("Botch read data on $cfile\n");
346 last if ( length($d) == 0 );
348 if ( ($r = $f->read(\$d, 100)) != 0 || ($r = $f->read(\$d, 100)) != 0 ) {
349 printf("Botch at EOF on $cfile got $r (%d,%d)\n",
350 sysseek(FH, 0, 1), $n);
359 my($file, $cfile) = @_;
360 return if ( !-f $file || !-f $cfile );
361 my $f = BackupPC::FileZIO->open($cfile, 0, $Compress)
362 || die("can't open $cfile for read\n");
363 my($n, $nd, $r, $d, $d0);
366 if ( !open(FH, "<", $file) ) {
367 print("can't open $file for check\n");
377 print("Botch read data on $cfile\n");
379 last if ( length($d) == 0 );
381 if ( ($r = $f->read(\$d, 100)) != 0 || ($r = $f->read(\$d, 100)) != 0 ) {
382 printf("Botch at EOF on $cfile got $r (%d,%d)\n",
383 sysseek(FH, 0, 1), $n);
395 sub compressHostFiles
398 my(@Files, @Backups, $fh, $data);
401 if ( !defined($host) ) {
402 for ( my $i = 0 ; ; $i++ ) {
403 last if ( !-f "$LogDir/LOG.$i" );
404 push(@Files, "$LogDir/LOG.$i");
407 @Backups = $bpc->BackupInfoRead($host);
408 for ( my $i = 0 ; $i < @Backups ; $i++ ) {
409 next if ( $Backups[$i]{compress} );
410 push(@Files, "$TopDir/pc/$host/SmbLOG.$Backups[$i]{num}");
411 push(@Files, "$TopDir/pc/$host/XferLOG.$Backups[$i]{num}");
413 push(@Files, "$TopDir/pc/$host/SmbLOG.bad");
414 push(@Files, "$TopDir/pc/$host/XferLOG.bad");
415 for ( my $i = 0 ; ; $i++ ) {
416 last if ( !-f "$TopDir/pc/$host/LOG.$i" );
417 push(@Files, "$TopDir/pc/$host/LOG.$i");
420 foreach my $file ( @Files ) {
422 print("Child got signal $SigName; quitting\n");
426 next if ( !-f $file );
427 if ( !BackupPC::FileZIO->compressCopy($file, "$file.z", undef,
428 $Compress, !$TestMode) ) {
429 print("compressCopy($file, $file.z, $Compress, !$TestMode)"
432 } elsif ( $TestMode ) {
433 checkReadLine($file, "$file.z") if ( $ReadCheck );
439 sub updateHostBackupInfo
443 my @Backups = $bpc->BackupInfoRead($host);
444 for ( my $i = 0 ; $i < @Backups ; $i++ ) {
445 $Backups[$i]{compress} = $Compress;
447 $bpc->BackupInfoWrite($host, @Backups);
451 my @Dirs = split(//, "0123456789abcdef");
452 my @Hosts = sort(keys(%{$bpc->HostInfoRead()}));
457 # First make sure there are no existing compressed backups
459 my(%compHosts, $compCnt);
460 for ( my $j = 0 ; $j < @Hosts ; $j++ ) {
461 my $host = $Hosts[$j];
462 my @Backups = $bpc->BackupInfoRead($host);
463 for ( my $i = 0 ; $i < @Backups ; $i++ ) {
464 next if ( !$Backups[$i]{compress} );
470 my $compHostStr = join("\n + ", sort(keys(%compHosts)));
472 BackupPC_compressPool: there are $compCnt compressed backups.
473 BackupPC_compressPool can only be run when there are no existing
474 compressed backups. The following hosts have compressed backups:
478 If you really want to run BackupPC_compressPool you will need to remove
479 all the existing compressed backups (and /home/pcbackup/data/cpool).
480 Think carefully before you do this. Otherwise, you can just let new
481 compressed backups run and the old uncompressed backups and pool will
488 # Next spawn $nChild children that actually do all the work.
490 for ( my $i = 0 ; $i < $nChild ; $i++ ) {
493 if ( !defined($pid = open(CHILD, "-|")) ) {
494 print("Can't fork\n");
497 my $nDirs = @Dirs / ($nChild - $i);
498 my $nHosts = @Hosts / ($nChild - $i);
502 # First process each of the hosts (compress per-pc log files etc).
504 for ( my $j = 0 ; $j < $nHosts ; $j++ ) {
505 compressHostFiles($Hosts[$j]);
508 # Count the total number of directories so we can estimate the
509 # completion time. We ignore empty directories by reading each
510 # directory and making sure it has at least 3 entries (ie, ".",
513 for ( my $j = 0 ; $j < $nDirs ; $j++ ) {
514 my $thisDir = $Dirs[$j];
515 next if ( !-d "$PoolDir/$thisDir" );
516 foreach my $dir ( <$PoolDir/$thisDir/*/*> ) {
517 next if ( !opendir(DIR, $dir) );
518 my @files = readdir(DIR);
520 $SubDirCnt++ if ( @files > 2 );
524 # Now process each of the directories
526 for ( my $j = 0 ; $j < $nDirs ; $j++ ) {
527 my $thisDir = shift(@Dirs);
528 next if ( !-d "$PoolDir/$thisDir" );
529 find({wanted => sub { doCompress($File::Find::name); },
530 no_chdir => 1}, "$PoolDir/$thisDir");
533 # Last, update the backup info file for each of the hosts
535 for ( my $j = 0 ; $j < $nHosts ; $j++ ) {
536 updateHostBackupInfo($Hosts[$j]);
538 $SubDirDone = $SubDirCnt;
543 # This is the parent. Peel off $nDirs directories, $nHosts hosts,
546 $Jobs[$i]{fh} = *CHILD;
547 $Jobs[$i]{pid} = $pid;
548 vec($FDread, fileno($Jobs[$i]{fh}), 1) = 1;
549 splice(@Dirs, 0, $nDirs);
550 splice(@Hosts, 0, $nHosts);
554 # compress the main log files (in the parents)
556 compressHostFiles(undef);
559 # Now wait for all the children to report results and finish up
561 my $TimeStart = time;
564 while ( $FDread !~ /^\0*$/ ) {
566 select(my $rout = $FDread, undef, $ein, undef);
567 if ( $SigName ne $GotSignal ) {
568 print("Got signal $SigName; waiting for $nChild children to cleanup\n");
569 $GotSignal = $SigName;
571 for ( my $i = 0 ; $i < $nChild ; $i++ ) {
572 next if ( !vec($rout, fileno($Jobs[$i]{fh}), 1) );
574 if ( sysread($Jobs[$i]{fh}, $data, 1024) <= 0 ) {
575 vec($FDread, fileno($Jobs[$i]{fh}), 1) = 0;
576 close($Jobs[$i]{fh});
579 $Jobs[$i]{mesg} .= $data;
580 while ( $Jobs[$i]{mesg} =~ /(.*?)[\n\r]+(.*)/s ) {
582 $Jobs[$i]{mesg} = $2;
583 if ( $mesg =~ /^stats: (\d+) (\d+) (\d+) (\d+) (\d+) (\d+)/ ) {
584 $Jobs[$i]{SubDirDone} = $1;
585 $Jobs[$i]{SubDirCnt} = $2;
586 $Jobs[$i]{FileCnt} = $3;
587 $Jobs[$i]{FileOrigSz} = $4;
588 $Jobs[$i]{FileCompressSz} = $5;
589 $Jobs[$i]{Errors} = $6;
590 $SubDirDone = $SubDirCnt = $FileCnt = $FileOrigSz = 0;
591 $FileCompressSz = $Errors = 0;
593 for ( my $j = 0 ; $j < $nChild ; $j++ ) {
594 next if ( !defined($Jobs[$j]{SubDirDone}) );
595 $SubDirDone += $Jobs[$j]{SubDirDone};
596 $SubDirCnt += $Jobs[$j]{SubDirCnt};
597 $FileCnt += $Jobs[$j]{FileCnt};
598 $FileOrigSz += $Jobs[$j]{FileOrigSz};
599 $FileCompressSz += $Jobs[$j]{FileCompressSz};
600 $Errors += $Jobs[$j]{Errors};
605 my $pctDone = 100 * $SubDirDone / $SubDirCnt;
606 if ( $numReports == $nChild && $pctDone >= $DonePct + 1 ) {
607 $DonePct = int($pctDone);
608 my $estSecLeft = 1.2 * (time - $TimeStart)
609 * (100 / $pctDone - 1);
610 my $timeStamp = $bpc->timeStamp;
611 printf("%sDone %2.0f%% (%d of %d dirs, %d files,"
612 . " %.2fGB raw, %.1f%% reduce, %d errors)\n",
614 $pctDone, $SubDirDone, $SubDirCnt, $FileCnt,
615 $FileOrigSz / (1024 * 1024 * 1000),
616 100 * (1 - $FileCompressSz / $FileOrigSz));
617 printf("%s Est complete in %.1f hours (around %s)\n",
618 $timeStamp, $estSecLeft / 3600,
619 $bpc->timeStamp(time + $estSecLeft, 1))
620 if ( $DonePct < 100 );
629 print("Finished with $Errors errors!!!!\n");