2 #============================================================= -*-perl-*-
4 # BackupPC_compressPool: Compress existing pool
8 # Usage: BackupPC_compressPool [-t] [-r] <host>
11 # -t test mode: do everything except actually replace the pool files.
12 # Useful for estimating total run time without making any real
14 # -r read check: re-read the compressed file and compare it against
15 # the original uncompressed file. Can only be used in test mode.
16 # -c # number of children to fork. BackupPC_compressPool can take
17 # a long time to run, so to speed things up it spawns four children,
18 # each working on a different part of the pool. You can change
19 # the number of children with the -c option.
21 # BackupPC_compressPool is used to convert an uncompressed pool to
22 # a compressed pool. If BackupPC compression is enabled after
23 # uncompressed backups already exist, BackupPC_compressPool can
24 # be used to compress all the old uncompressed backups.
26 # It is important that BackupPC not run while BackupPC_compressPool
27 # runs. Also, BackupPC_compressPool must run to completion before
28 # BackupPC is restarted.
31 # Craig Barratt <cbarratt@users.sourceforge.net>
34 # Copyright (C) 2001 Craig Barratt
36 # This program is free software; you can redistribute it and/or modify
37 # it under the terms of the GNU General Public License as published by
38 # the Free Software Foundation; either version 2 of the License, or
39 # (at your option) any later version.
41 # This program is distributed in the hope that it will be useful,
42 # but WITHOUT ANY WARRANTY; without even the implied warranty of
43 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 # GNU General Public License for more details.
46 # You should have received a copy of the GNU General Public License
47 # along with this program; if not, write to the Free Software
48 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
50 #========================================================================
52 # Version 2.0.0_CVS, released 3 Feb 2003.
54 # See http://backuppc.sourceforge.net.
56 #========================================================================
64 use lib "/usr/local/BackupPC/lib";
66 use BackupPC::FileZIO;
68 die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) );
70 my $TopDir = $bpc->TopDir();
71 my $BinDir = $bpc->BinDir();
72 my %Conf = $bpc->Conf();
73 my $PoolDir = "$TopDir/pool";
74 my $CPoolDir = "$TopDir/cpool";
75 my $Compress = $Conf{CompressLevel};
80 # Catch various signals
82 foreach my $sig ( qw(INT BUS SEGV PIPE TERM ALRM HUP) ) {
83 $SIG{$sig} = \&catch_signal;
88 my $CompMaxRead = 131072; # 128K
89 my $CompMaxWrite = 6291456; # 6MB
91 if ( !getopts("trc:", \%opts) || @ARGV != 0 ) {
92 print("usage: $0 [-c nChild] [-r] [-t]\n");
95 my $TestMode = $opts{t};
96 my $ReadCheck = $opts{r};
97 my $nChild = $opts{c} || 4;
98 if ( $ReadCheck && !$TestMode ) {
99 print(STDERR "$0: -r (read check) option must have -t (test)\n");
102 if ( $nChild < 1 || $nChild >= 16 ) {
103 print(STDERR "$0: number of children (-c option) must be from 1 to 16\n");
106 if ( !BackupPC::FileZIO->compOk ) {
108 $0: Compress::Zlib is not installed. You need to install it
109 before running this script.
113 if ( $Compress <= 0 ) {
115 $0: compression is not enabled. \%Conf{CompressLevel} needs
116 to be set to a value from 1 to 9. Please edit the config.pl file and
128 my $FileCompressSz = 0;
130 my $err = $bpc->ServerConnect($Conf{ServerHost}, $Conf{ServerPort});
133 BackupPC is running on $Conf{ServerHost}. You need to stop BackupPC
134 before you can upgrade the code. Depending upon your installation,
135 you could run "/etc/init.d/backuppc stop".
140 umask($Conf{UmaskMode});
145 if ( $new !~ m{/(\w/\w/\w)/(\w{32})(_\d+)?$} ) {
146 print("Error: Can't parse filename from $new\n");
150 my $dir = "$CPoolDir/$1";
152 mkpath($dir, 0, 0777) if ( !-d $dir );
153 return $new if ( !-f $new );
154 for ( my $i = 0 ; ; $i++ ) {
155 return "${new}_$i" if ( !-f "${new}_$i" );
161 my $file = ($File::Find::name =~ /(.*)/ && $1);
163 my(@s) = stat($file);
164 my($n, $dataIn, $dataOut, $flush, $copy);
167 print("Child got signal $SigName; quitting\n");
171 return if ( !-f $file );
172 my $defl = deflateInit(
176 if ( !open(FH, $TestMode ? "<" : "+<", $file) ) {
177 print("Error: Can't open $file for read/write\n");
181 while ( sysread(FH, $dataIn, $CompMaxWrite) > 0 ) {
183 $FileOrigSz += length($dataIn);
184 my $fragOut = $defl->deflate($dataIn);
185 if ( length($fragOut) < $CompMaxRead ) {
187 # Compression is too high: to avoid huge memory requirements
188 # on read we need to flush().
190 $fragOut .= $defl->flush();
197 $dataOut .= $fragOut;
198 if ( !$copy && length($dataOut) > $CompMaxWrite ) {
199 if ( !open(OUT, "+>", "$file.__z") ) {
200 print("Error: Can't open $file.__z for write\n");
207 if ( $copy && $dataOut ne "" ) {
208 if ( syswrite(OUT, $dataOut) != length($dataOut) ) {
209 printf("Error: Can't write %d bytes to %s\n",
210 length($dataOut), "$file.__z");
217 $FileCompressSz += length($dataOut);
222 $dataOut .= $defl->flush();
223 if ( $copy && $dataOut ne "" ) {
224 if ( syswrite(OUT, $dataOut) != length($dataOut) ) {
225 printf("Error: Can't write %d bytes to %s\n",
226 length($dataOut), "$file.__z");
233 $FileCompressSz += length($dataOut);
237 my $newFile = cpoolFileName($file);
240 if ( !open(FH, ">", $newFile) ) {
241 print("Error: Can't open $newFile for write\n");
249 if ( !sysseek(OUT, 0, 0) ) {
250 print("Error: Can't seek $file.__z to 0\n");
253 if ( !sysseek(FH, 0, 0) ) {
254 print("Error: Can't seek $newFile to 0\n");
257 while ( sysread(OUT, $dataIn, $CompMaxWrite) > 0 ) {
258 if ( syswrite(FH, $dataIn) != length($dataIn) ) {
259 printf("Error: Can't write %d bytes to %s\n",
260 length($dataIn), $file);
264 if ( !truncate(FH, sysseek(OUT, 0, 1)) ) {
265 printf("Error: Can't truncate %s to %d\n",
266 $file, sysseek(OUT, 0, 1));
273 if ( !sysseek(FH, 0, 0) ) {
274 print("Error: Can't seek $file to 0\n");
277 if ( syswrite(FH, $dataOut) != length($dataOut) ) {
278 printf("Error: Can't write %d bytes to %s\n",
279 length($dataOut), $file);
282 $FileCompressSz += length($dataOut);
283 if ( !truncate(FH, length($dataOut)) ) {
284 printf("Error: Can't truncate %s to %d\n", $file, length($dataOut));
291 checkRead($file, $newFile);
295 rename($file, $newFile);
296 my $atime = $s[8] =~ /(.*)/ && $1;
297 my $mtime = $s[9] =~ /(.*)/ && $1;
298 utime($atime, $mtime, $newFile);
300 (my $dir = $file) =~ s{/[^/]*$}{};
302 if ( $SubDirCurr ne "" && $SubDirCurr ne $dir ) {
306 } elsif ( $SubDirCurr eq "" ) {
313 print("stats: $SubDirDone $SubDirCnt $FileCnt $FileOrigSz"
314 . " $FileCompressSz $Errors\n");
319 my($file, $cfile) = @_;
320 return if ( !-f $file || !-f $cfile );
321 my $f = BackupPC::FileZIO->open($cfile, 0, $Compress)
322 || die("can't open $cfile for read\n");
323 my($n, $nd, $r, $d, $d0);
326 if ( !open(FH, "<", $file) ) {
327 print("can't open $file for check\n");
332 #print("comparing $file to $cfile\n");
334 $n = 1 + int(rand($CompMaxRead) + rand(100));
335 $r = $f->read(\$d, $n);
336 sysread(FH, $d0, $n);
338 print("Botch read data on $cfile\n");
340 last if ( length($d) == 0 );
342 if ( ($r = $f->read(\$d, 100)) != 0 || ($r = $f->read(\$d, 100)) != 0 ) {
343 printf("Botch at EOF on $cfile got $r (%d,%d)\n",
344 sysseek(FH, 0, 1), $n);
353 my($file, $cfile) = @_;
354 return if ( !-f $file || !-f $cfile );
355 my $f = BackupPC::FileZIO->open($cfile, 0, $Compress)
356 || die("can't open $cfile for read\n");
357 my($n, $nd, $r, $d, $d0);
360 if ( !open(FH, "<", $file) ) {
361 print("can't open $file for check\n");
370 print("Botch read data on $cfile\n");
372 last if ( length($d) == 0 );
374 if ( ($r = $f->read(\$d, 100)) != 0 || ($r = $f->read(\$d, 100)) != 0 ) {
375 printf("Botch at EOF on $cfile got $r (%d,%d)\n",
376 sysseek(FH, 0, 1), $n);
388 sub compressHostFiles
391 my(@Files, @Backups, $fh, $data);
394 if ( !defined($host) ) {
395 for ( my $i = 0 ; ; $i++ ) {
396 last if ( !-f "$TopDir/log/LOG.$i" );
397 push(@Files, "$TopDir/log/LOG.$i");
400 @Backups = $bpc->BackupInfoRead($host);
401 for ( my $i = 0 ; $i < @Backups ; $i++ ) {
402 next if ( $Backups[$i]{compress} );
403 push(@Files, "$TopDir/pc/$host/SmbLOG.$Backups[$i]{num}");
404 push(@Files, "$TopDir/pc/$host/XferLOG.$Backups[$i]{num}");
406 push(@Files, "$TopDir/pc/$host/SmbLOG.bad");
407 push(@Files, "$TopDir/pc/$host/XferLOG.bad");
408 for ( my $i = 0 ; ; $i++ ) {
409 last if ( !-f "$TopDir/pc/$host/LOG.$i" );
410 push(@Files, "$TopDir/pc/$host/LOG.$i");
413 foreach my $file ( @Files ) {
415 print("Child got signal $SigName; quitting\n");
419 next if ( !-f $file );
420 if ( !BackupPC::FileZIO->compressCopy($file, "$file.z", undef,
421 $Compress, !$TestMode) ) {
422 print("compressCopy($file, $file.z, $Compress, !$TestMode)"
425 } elsif ( $TestMode ) {
426 checkReadLine($file, "$file.z") if ( $ReadCheck );
432 sub updateHostBackupInfo
436 my @Backups = $bpc->BackupInfoRead($host);
437 for ( my $i = 0 ; $i < @Backups ; $i++ ) {
438 $Backups[$i]{compress} = $Compress;
440 $bpc->BackupInfoWrite($host, @Backups);
444 my @Dirs = split(//, "0123456789abcdef");
445 my @Hosts = sort(keys(%{$bpc->HostInfoRead()}));
450 # First make sure there are no existing compressed backups
452 my(%compHosts, $compCnt);
453 for ( my $j = 0 ; $j < @Hosts ; $j++ ) {
454 my $host = $Hosts[$j];
455 my @Backups = $bpc->BackupInfoRead($host);
456 for ( my $i = 0 ; $i < @Backups ; $i++ ) {
457 next if ( !$Backups[$i]{compress} );
463 my $compHostStr = join("\n + ", sort(keys(%compHosts)));
465 BackupPC_compressPool: there are $compCnt compressed backups.
466 BackupPC_compressPool can only be run when there are no existing
467 compressed backups. The following hosts have compressed backups:
471 If you really want to run BackupPC_compressPool you will need to remove
472 all the existing compressed backups (and /home/pcbackup/data/cpool).
473 Think carefully before you do this. Otherwise, you can just let new
474 compressed backups run and the old uncompressed backups and pool will
481 # Next spawn $nChild children that actually do all the work.
483 for ( my $i = 0 ; $i < $nChild ; $i++ ) {
486 if ( !defined($pid = open(CHILD, "-|")) ) {
487 print("Can't fork\n");
490 my $nDirs = @Dirs / ($nChild - $i);
491 my $nHosts = @Hosts / ($nChild - $i);
495 # First process each of the hosts (compress per-pc log files etc).
497 for ( my $j = 0 ; $j < $nHosts ; $j++ ) {
498 compressHostFiles($Hosts[$j]);
501 # Count the total number of directories so we can estimate the
502 # completion time. We ignore empty directories by reading each
503 # directory and making sure it has at least 3 entries (ie, ".",
506 for ( my $j = 0 ; $j < $nDirs ; $j++ ) {
507 my $thisDir = $Dirs[$j];
508 next if ( !-d "$PoolDir/$thisDir" );
509 foreach my $dir ( <$PoolDir/$thisDir/*/*> ) {
510 next if ( !opendir(DIR, $dir) );
511 my @files = readdir(DIR);
513 $SubDirCnt++ if ( @files > 2 );
517 # Now process each of the directories
519 for ( my $j = 0 ; $j < $nDirs ; $j++ ) {
520 my $thisDir = shift(@Dirs);
521 next if ( !-d "$PoolDir/$thisDir" );
522 find({wanted => sub { doCompress($File::Find::name); },
523 no_chdir => 1}, "$PoolDir/$thisDir");
526 # Last, update the backup info file for each of the hosts
528 for ( my $j = 0 ; $j < $nHosts ; $j++ ) {
529 updateHostBackupInfo($Hosts[$j]);
531 $SubDirDone = $SubDirCnt;
536 # This is the parent. Peel off $nDirs directories, $nHosts hosts,
539 $Jobs[$i]{fh} = *CHILD;
540 $Jobs[$i]{pid} = $pid;
541 vec($FDread, fileno($Jobs[$i]{fh}), 1) = 1;
542 splice(@Dirs, 0, $nDirs);
543 splice(@Hosts, 0, $nHosts);
547 # compress the main log files (in the parents)
549 compressHostFiles(undef);
552 # Now wait for all the children to report results and finish up
554 my $TimeStart = time;
557 while ( $FDread !~ /^\0*$/ ) {
559 select(my $rout = $FDread, undef, $ein, undef);
560 if ( $SigName ne $GotSignal ) {
561 print("Got signal $SigName; waiting for $nChild children to cleanup\n");
562 $GotSignal = $SigName;
564 for ( my $i = 0 ; $i < $nChild ; $i++ ) {
565 next if ( !vec($rout, fileno($Jobs[$i]{fh}), 1) );
567 if ( sysread($Jobs[$i]{fh}, $data, 1024) <= 0 ) {
568 vec($FDread, fileno($Jobs[$i]{fh}), 1) = 0;
569 close($Jobs[$i]{fh});
572 $Jobs[$i]{mesg} .= $data;
573 while ( $Jobs[$i]{mesg} =~ /(.*?)[\n\r]+(.*)/s ) {
575 $Jobs[$i]{mesg} = $2;
576 if ( $mesg =~ /^stats: (\d+) (\d+) (\d+) (\d+) (\d+) (\d+)/ ) {
577 $Jobs[$i]{SubDirDone} = $1;
578 $Jobs[$i]{SubDirCnt} = $2;
579 $Jobs[$i]{FileCnt} = $3;
580 $Jobs[$i]{FileOrigSz} = $4;
581 $Jobs[$i]{FileCompressSz} = $5;
582 $Jobs[$i]{Errors} = $6;
583 $SubDirDone = $SubDirCnt = $FileCnt = $FileOrigSz = 0;
584 $FileCompressSz = $Errors = 0;
586 for ( my $j = 0 ; $j < $nChild ; $j++ ) {
587 next if ( !defined($Jobs[$j]{SubDirDone}) );
588 $SubDirDone += $Jobs[$j]{SubDirDone};
589 $SubDirCnt += $Jobs[$j]{SubDirCnt};
590 $FileCnt += $Jobs[$j]{FileCnt};
591 $FileOrigSz += $Jobs[$j]{FileOrigSz};
592 $FileCompressSz += $Jobs[$j]{FileCompressSz};
593 $Errors += $Jobs[$j]{Errors};
598 my $pctDone = 100 * $SubDirDone / $SubDirCnt;
599 if ( $numReports == $nChild && $pctDone >= $DonePct + 1 ) {
600 $DonePct = int($pctDone);
601 my $estSecLeft = 1.2 * (time - $TimeStart)
602 * (100 / $pctDone - 1);
603 my $timeStamp = $bpc->timeStamp;
604 printf("%sDone %2.0f%% (%d of %d dirs, %d files,"
605 . " %.2fGB raw, %.1f%% reduce, %d errors)\n",
607 $pctDone, $SubDirDone, $SubDirCnt, $FileCnt,
608 $FileOrigSz / (1024 * 1024 * 1000),
609 100 * (1 - $FileCompressSz / $FileOrigSz));
610 printf("%s Est complete in %.1f hours (around %s)\n",
611 $timeStamp, $estSecLeft / 3600,
612 $bpc->timeStamp(time + $estSecLeft, 1))
613 if ( $DonePct < 100 );
622 print("Finished with $Errors errors!!!!\n");