2 #============================================================= -*-perl-*-
4 # BackupPC_compressPool: Compress existing pool
8 # Usage: BackupPC_compressPool [-t] [-r] <host>
11 # -t test mode: do everything except actually replace the pool files.
12 # Useful for estimating total run time without making any real
14 # -r read check: re-read the compressed file and compare it against
15 # the original uncompressed file. Can only be used in test mode.
16 # -c # number of children to fork. BackupPC_compressPool can take
17 # a long time to run, so to speed things up it spawns four children,
18 # each working on a different part of the pool. You can change
19 # the number of children with the -c option.
21 # BackupPC_compressPool is used to convert an uncompressed pool to
22 # a compressed pool. If BackupPC compression is enabled after
23 # uncompressed backups already exist, BackupPC_compressPool can
24 # be used to compress all the old uncompressed backups.
26 # It is important that BackupPC not run while BackupPC_compressPool
27 # runs. Also, BackupPC_compressPool must run to completion before
28 # BackupPC is restarted.
31 # Craig Barratt <cbarratt@users.sourceforge.net>
34 # Copyright (C) 2001-2003 Craig Barratt
36 # This program is free software; you can redistribute it and/or modify
37 # it under the terms of the GNU General Public License as published by
38 # the Free Software Foundation; either version 2 of the License, or
39 # (at your option) any later version.
41 # This program is distributed in the hope that it will be useful,
42 # but WITHOUT ANY WARRANTY; without even the implied warranty of
43 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 # GNU General Public License for more details.
46 # You should have received a copy of the GNU General Public License
47 # along with this program; if not, write to the Free Software
48 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
50 #========================================================================
52 # Version 2.1.0, released 20 Jun 2004.
54 # See http://backuppc.sourceforge.net.
56 #========================================================================
65 use lib "__INSTALLDIR__/lib";
67 use BackupPC::FileZIO;
69 die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) );
71 my $TopDir = $bpc->TopDir();
72 my $BinDir = $bpc->BinDir();
73 my %Conf = $bpc->Conf();
74 my $PoolDir = "$TopDir/pool";
75 my $CPoolDir = "$TopDir/cpool";
76 my $Compress = $Conf{CompressLevel};
81 # Catch various signals
83 foreach my $sig ( qw(INT BUS SEGV PIPE TERM ALRM HUP) ) {
84 $SIG{$sig} = \&catch_signal;
89 my $CompMaxRead = 131072; # 128K
90 my $CompMaxWrite = 6291456; # 6MB
92 if ( !getopts("trc:", \%opts) || @ARGV != 0 ) {
93 print("usage: $0 [-c nChild] [-r] [-t]\n");
96 my $TestMode = $opts{t};
97 my $ReadCheck = $opts{r};
98 my $nChild = $opts{c} || 4;
99 if ( $ReadCheck && !$TestMode ) {
100 print(STDERR "$0: -r (read check) option must have -t (test)\n");
103 if ( $nChild < 1 || $nChild >= 16 ) {
104 print(STDERR "$0: number of children (-c option) must be from 1 to 16\n");
107 if ( !BackupPC::FileZIO->compOk ) {
109 $0: Compress::Zlib is not installed. You need to install it
110 before running this script.
114 if ( $Compress <= 0 ) {
116 $0: compression is not enabled. \%Conf{CompressLevel} needs
117 to be set to a value from 1 to 9. Please edit the config.pl file and
129 my $FileCompressSz = 0;
131 my $err = $bpc->ServerConnect($Conf{ServerHost}, $Conf{ServerPort});
134 BackupPC is running on $Conf{ServerHost}. You need to stop BackupPC
135 before you can upgrade the code. Depending upon your installation,
136 you could run "/etc/init.d/backuppc stop".
141 umask($Conf{UmaskMode});
146 if ( $new !~ m{/(\w/\w/\w)/(\w{32})(_\d+)?$} ) {
147 print("Error: Can't parse filename from $new\n");
151 my $dir = "$CPoolDir/$1";
153 mkpath($dir, 0, 0777) if ( !-d $dir );
154 return $new if ( !-f $new );
155 for ( my $i = 0 ; ; $i++ ) {
156 return "${new}_$i" if ( !-f "${new}_$i" );
162 my $file = ($File::Find::name =~ /(.*)/ && $1);
164 my(@s) = stat($file);
165 my($n, $dataIn, $dataOut, $flush, $copy);
168 print("Child got signal $SigName; quitting\n");
172 return if ( !-f $file );
173 my $defl = deflateInit(
177 if ( !open(FH, $TestMode ? "<" : "+<", $file) ) {
178 print("Error: Can't open $file for read/write\n");
183 while ( sysread(FH, $dataIn, $CompMaxWrite) > 0 ) {
185 $FileOrigSz += length($dataIn);
186 my $fragOut = $defl->deflate($dataIn);
187 if ( length($fragOut) < $CompMaxRead ) {
189 # Compression is too high: to avoid huge memory requirements
190 # on read we need to flush().
192 $fragOut .= $defl->flush();
199 $dataOut .= $fragOut;
200 if ( !$copy && length($dataOut) > $CompMaxWrite ) {
201 if ( !open(OUT, "+>", "$file.__z") ) {
202 print("Error: Can't open $file.__z for write\n");
210 if ( $copy && $dataOut ne "" ) {
211 if ( syswrite(OUT, $dataOut) != length($dataOut) ) {
212 printf("Error: Can't write %d bytes to %s\n",
213 length($dataOut), "$file.__z");
220 $FileCompressSz += length($dataOut);
225 $dataOut .= $defl->flush();
226 if ( $copy && $dataOut ne "" ) {
227 if ( syswrite(OUT, $dataOut) != length($dataOut) ) {
228 printf("Error: Can't write %d bytes to %s\n",
229 length($dataOut), "$file.__z");
236 $FileCompressSz += length($dataOut);
240 my $newFile = cpoolFileName($file);
243 if ( !open(FH, ">", $newFile) ) {
244 print("Error: Can't open $newFile for write\n");
253 if ( !sysseek(OUT, 0, 0) ) {
254 print("Error: Can't seek $file.__z to 0\n");
257 if ( !sysseek(FH, 0, 0) ) {
258 print("Error: Can't seek $newFile to 0\n");
261 while ( sysread(OUT, $dataIn, $CompMaxWrite) > 0 ) {
262 if ( syswrite(FH, $dataIn) != length($dataIn) ) {
263 printf("Error: Can't write %d bytes to %s\n",
264 length($dataIn), $file);
268 if ( !truncate(FH, sysseek(OUT, 0, 1)) ) {
269 printf("Error: Can't truncate %s to %d\n",
270 $file, sysseek(OUT, 0, 1));
277 if ( !sysseek(FH, 0, 0) ) {
278 print("Error: Can't seek $file to 0\n");
281 if ( syswrite(FH, $dataOut) != length($dataOut) ) {
282 printf("Error: Can't write %d bytes to %s\n",
283 length($dataOut), $file);
286 $FileCompressSz += length($dataOut);
287 if ( !truncate(FH, length($dataOut)) ) {
288 printf("Error: Can't truncate %s to %d\n", $file, length($dataOut));
295 checkRead($file, $newFile);
299 rename($file, $newFile);
300 my $atime = $s[8] =~ /(.*)/ && $1;
301 my $mtime = $s[9] =~ /(.*)/ && $1;
302 utime($atime, $mtime, $newFile);
304 (my $dir = $file) =~ s{/[^/]*$}{};
306 if ( $SubDirCurr ne "" && $SubDirCurr ne $dir ) {
310 } elsif ( $SubDirCurr eq "" ) {
317 print("stats: $SubDirDone $SubDirCnt $FileCnt $FileOrigSz"
318 . " $FileCompressSz $Errors\n");
323 my($file, $cfile) = @_;
324 return if ( !-f $file || !-f $cfile );
325 my $f = BackupPC::FileZIO->open($cfile, 0, $Compress)
326 || die("can't open $cfile for read\n");
327 my($n, $nd, $r, $d, $d0);
330 if ( !open(FH, "<", $file) ) {
331 print("can't open $file for check\n");
337 #print("comparing $file to $cfile\n");
339 $n = 1 + int(rand($CompMaxRead) + rand(100));
340 $r = $f->read(\$d, $n);
341 sysread(FH, $d0, $n);
343 print("Botch read data on $cfile\n");
345 last if ( length($d) == 0 );
347 if ( ($r = $f->read(\$d, 100)) != 0 || ($r = $f->read(\$d, 100)) != 0 ) {
348 printf("Botch at EOF on $cfile got $r (%d,%d)\n",
349 sysseek(FH, 0, 1), $n);
358 my($file, $cfile) = @_;
359 return if ( !-f $file || !-f $cfile );
360 my $f = BackupPC::FileZIO->open($cfile, 0, $Compress)
361 || die("can't open $cfile for read\n");
362 my($n, $nd, $r, $d, $d0);
365 if ( !open(FH, "<", $file) ) {
366 print("can't open $file for check\n");
376 print("Botch read data on $cfile\n");
378 last if ( length($d) == 0 );
380 if ( ($r = $f->read(\$d, 100)) != 0 || ($r = $f->read(\$d, 100)) != 0 ) {
381 printf("Botch at EOF on $cfile got $r (%d,%d)\n",
382 sysseek(FH, 0, 1), $n);
394 sub compressHostFiles
397 my(@Files, @Backups, $fh, $data);
400 if ( !defined($host) ) {
401 for ( my $i = 0 ; ; $i++ ) {
402 last if ( !-f "$TopDir/log/LOG.$i" );
403 push(@Files, "$TopDir/log/LOG.$i");
406 @Backups = $bpc->BackupInfoRead($host);
407 for ( my $i = 0 ; $i < @Backups ; $i++ ) {
408 next if ( $Backups[$i]{compress} );
409 push(@Files, "$TopDir/pc/$host/SmbLOG.$Backups[$i]{num}");
410 push(@Files, "$TopDir/pc/$host/XferLOG.$Backups[$i]{num}");
412 push(@Files, "$TopDir/pc/$host/SmbLOG.bad");
413 push(@Files, "$TopDir/pc/$host/XferLOG.bad");
414 for ( my $i = 0 ; ; $i++ ) {
415 last if ( !-f "$TopDir/pc/$host/LOG.$i" );
416 push(@Files, "$TopDir/pc/$host/LOG.$i");
419 foreach my $file ( @Files ) {
421 print("Child got signal $SigName; quitting\n");
425 next if ( !-f $file );
426 if ( !BackupPC::FileZIO->compressCopy($file, "$file.z", undef,
427 $Compress, !$TestMode) ) {
428 print("compressCopy($file, $file.z, $Compress, !$TestMode)"
431 } elsif ( $TestMode ) {
432 checkReadLine($file, "$file.z") if ( $ReadCheck );
438 sub updateHostBackupInfo
442 my @Backups = $bpc->BackupInfoRead($host);
443 for ( my $i = 0 ; $i < @Backups ; $i++ ) {
444 $Backups[$i]{compress} = $Compress;
446 $bpc->BackupInfoWrite($host, @Backups);
450 my @Dirs = split(//, "0123456789abcdef");
451 my @Hosts = sort(keys(%{$bpc->HostInfoRead()}));
456 # First make sure there are no existing compressed backups
458 my(%compHosts, $compCnt);
459 for ( my $j = 0 ; $j < @Hosts ; $j++ ) {
460 my $host = $Hosts[$j];
461 my @Backups = $bpc->BackupInfoRead($host);
462 for ( my $i = 0 ; $i < @Backups ; $i++ ) {
463 next if ( !$Backups[$i]{compress} );
469 my $compHostStr = join("\n + ", sort(keys(%compHosts)));
471 BackupPC_compressPool: there are $compCnt compressed backups.
472 BackupPC_compressPool can only be run when there are no existing
473 compressed backups. The following hosts have compressed backups:
477 If you really want to run BackupPC_compressPool you will need to remove
478 all the existing compressed backups (and /home/pcbackup/data/cpool).
479 Think carefully before you do this. Otherwise, you can just let new
480 compressed backups run and the old uncompressed backups and pool will
487 # Next spawn $nChild children that actually do all the work.
489 for ( my $i = 0 ; $i < $nChild ; $i++ ) {
492 if ( !defined($pid = open(CHILD, "-|")) ) {
493 print("Can't fork\n");
496 my $nDirs = @Dirs / ($nChild - $i);
497 my $nHosts = @Hosts / ($nChild - $i);
501 # First process each of the hosts (compress per-pc log files etc).
503 for ( my $j = 0 ; $j < $nHosts ; $j++ ) {
504 compressHostFiles($Hosts[$j]);
507 # Count the total number of directories so we can estimate the
508 # completion time. We ignore empty directories by reading each
509 # directory and making sure it has at least 3 entries (ie, ".",
512 for ( my $j = 0 ; $j < $nDirs ; $j++ ) {
513 my $thisDir = $Dirs[$j];
514 next if ( !-d "$PoolDir/$thisDir" );
515 foreach my $dir ( <$PoolDir/$thisDir/*/*> ) {
516 next if ( !opendir(DIR, $dir) );
517 my @files = readdir(DIR);
519 $SubDirCnt++ if ( @files > 2 );
523 # Now process each of the directories
525 for ( my $j = 0 ; $j < $nDirs ; $j++ ) {
526 my $thisDir = shift(@Dirs);
527 next if ( !-d "$PoolDir/$thisDir" );
528 find({wanted => sub { doCompress($File::Find::name); },
529 no_chdir => 1}, "$PoolDir/$thisDir");
532 # Last, update the backup info file for each of the hosts
534 for ( my $j = 0 ; $j < $nHosts ; $j++ ) {
535 updateHostBackupInfo($Hosts[$j]);
537 $SubDirDone = $SubDirCnt;
542 # This is the parent. Peel off $nDirs directories, $nHosts hosts,
545 $Jobs[$i]{fh} = *CHILD;
546 $Jobs[$i]{pid} = $pid;
547 vec($FDread, fileno($Jobs[$i]{fh}), 1) = 1;
548 splice(@Dirs, 0, $nDirs);
549 splice(@Hosts, 0, $nHosts);
553 # compress the main log files (in the parents)
555 compressHostFiles(undef);
558 # Now wait for all the children to report results and finish up
560 my $TimeStart = time;
563 while ( $FDread !~ /^\0*$/ ) {
565 select(my $rout = $FDread, undef, $ein, undef);
566 if ( $SigName ne $GotSignal ) {
567 print("Got signal $SigName; waiting for $nChild children to cleanup\n");
568 $GotSignal = $SigName;
570 for ( my $i = 0 ; $i < $nChild ; $i++ ) {
571 next if ( !vec($rout, fileno($Jobs[$i]{fh}), 1) );
573 if ( sysread($Jobs[$i]{fh}, $data, 1024) <= 0 ) {
574 vec($FDread, fileno($Jobs[$i]{fh}), 1) = 0;
575 close($Jobs[$i]{fh});
578 $Jobs[$i]{mesg} .= $data;
579 while ( $Jobs[$i]{mesg} =~ /(.*?)[\n\r]+(.*)/s ) {
581 $Jobs[$i]{mesg} = $2;
582 if ( $mesg =~ /^stats: (\d+) (\d+) (\d+) (\d+) (\d+) (\d+)/ ) {
583 $Jobs[$i]{SubDirDone} = $1;
584 $Jobs[$i]{SubDirCnt} = $2;
585 $Jobs[$i]{FileCnt} = $3;
586 $Jobs[$i]{FileOrigSz} = $4;
587 $Jobs[$i]{FileCompressSz} = $5;
588 $Jobs[$i]{Errors} = $6;
589 $SubDirDone = $SubDirCnt = $FileCnt = $FileOrigSz = 0;
590 $FileCompressSz = $Errors = 0;
592 for ( my $j = 0 ; $j < $nChild ; $j++ ) {
593 next if ( !defined($Jobs[$j]{SubDirDone}) );
594 $SubDirDone += $Jobs[$j]{SubDirDone};
595 $SubDirCnt += $Jobs[$j]{SubDirCnt};
596 $FileCnt += $Jobs[$j]{FileCnt};
597 $FileOrigSz += $Jobs[$j]{FileOrigSz};
598 $FileCompressSz += $Jobs[$j]{FileCompressSz};
599 $Errors += $Jobs[$j]{Errors};
604 my $pctDone = 100 * $SubDirDone / $SubDirCnt;
605 if ( $numReports == $nChild && $pctDone >= $DonePct + 1 ) {
606 $DonePct = int($pctDone);
607 my $estSecLeft = 1.2 * (time - $TimeStart)
608 * (100 / $pctDone - 1);
609 my $timeStamp = $bpc->timeStamp;
610 printf("%sDone %2.0f%% (%d of %d dirs, %d files,"
611 . " %.2fGB raw, %.1f%% reduce, %d errors)\n",
613 $pctDone, $SubDirDone, $SubDirCnt, $FileCnt,
614 $FileOrigSz / (1024 * 1024 * 1000),
615 100 * (1 - $FileCompressSz / $FileOrigSz));
616 printf("%s Est complete in %.1f hours (around %s)\n",
617 $timeStamp, $estSecLeft / 3600,
618 $bpc->timeStamp(time + $estSecLeft, 1))
619 if ( $DonePct < 100 );
628 print("Finished with $Errors errors!!!!\n");