lib/BackupPC/Lib.pm

   1 #============================================================= -*-perl-*-
   2 #
   3 # BackupPC::Lib package
   4 #
   5 # DESCRIPTION
   6 #
   7 #   This library defines a BackupPC::Lib class and a variety of utility
   8 #   functions used by BackupPC.
   9 #
  10 # AUTHOR
  11 #   Craig Barratt  <cbarratt@users.sourceforge.net>
  12 #
  13 # COPYRIGHT
  14 #   Copyright (C) 2001  Craig Barratt
  15 #
  16 #   This program is free software; you can redistribute it and/or modify
  17 #   it under the terms of the GNU General Public License as published by
  18 #   the Free Software Foundation; either version 2 of the License, or
  19 #   (at your option) any later version.
  20 #
  21 #   This program is distributed in the hope that it will be useful,
  22 #   but WITHOUT ANY WARRANTY; without even the implied warranty of
  23 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  24 #   GNU General Public License for more details.
  25 #
  26 #   You should have received a copy of the GNU General Public License
  27 #   along with this program; if not, write to the Free Software
  28 #   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  29 #
  30 #========================================================================
  31 #
  32 # Version 2.0.2, released 6 Oct 2003.
  33 #
  34 # See http://backuppc.sourceforge.net.
  35 #
  36 #========================================================================
  37
  38 package BackupPC::Lib;
  39
  40 use strict;
  41
  42 use vars qw(%Conf %Lang);
  43 use Fcntl qw/:flock/;
  44 use Carp;
  45 use DirHandle ();
  46 use File::Path;
  47 use File::Compare;
  48 use Socket;
  49 use Cwd;
  50 use Digest::MD5;
  51
  52 sub new
  53 {
  54     my $class = shift;
  55     my($topDir, $installDir, $noUserCheck) = @_;
  56
  57     my $bpc = bless {
  58         TopDir  => $topDir || '/data/BackupPC2.0.2',
  59         BinDir  => $installDir || '/usr/local/BackupPC2.0.2',
  60         LibDir  => $installDir || '/usr/local/BackupPC2.0.2',
  61         Version => '2.0.2',
  62         BackupFields => [qw(
  63                     num type startTime endTime
  64                     nFiles size nFilesExist sizeExist nFilesNew sizeNew
  65                     xferErrs xferBadFile xferBadShare tarErrs
  66                     compress sizeExistComp sizeNewComp
  67                     noFill fillFromNum mangle xferMethod level
  68                 )],
  69         RestoreFields => [qw(
  70                     num startTime endTime result errorMsg nFiles size
  71                     tarCreateErrs xferErrs
  72                 )],
  73     }, $class;
  74     $bpc->{BinDir} .= "/bin";
  75     $bpc->{LibDir} .= "/lib";
  76     #
  77     # Clean up %ENV and setup other variables.
  78     #
  79     delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
  80     $bpc->{PoolDir}  = "$bpc->{TopDir}/pool";
  81     $bpc->{CPoolDir} = "$bpc->{TopDir}/cpool";
  82     if ( defined(my $error = $bpc->ConfigRead()) ) {
  83         print(STDERR $error, "\n");
  84         return;
  85     }
  86     #
  87     # Verify we are running as the correct user
  88     #
  89     if ( !$noUserCheck
  90             && $bpc->{Conf}{BackupPCUserVerify}
  91             && $> != (my $uid = (getpwnam($bpc->{Conf}{BackupPCUser}))[2]) ) {
  92         print("Wrong user: my userid is $>, instead of $uid"
  93             . " ($bpc->{Conf}{BackupPCUser})\n");
  94         return;
  95     }
  96     return $bpc;
  97 }
  98
  99 sub TopDir
 100 {
 101     my($bpc) = @_;
 102     return $bpc->{TopDir};
 103 }
 104
 105 sub BinDir
 106 {
 107     my($bpc) = @_;
 108     return $bpc->{BinDir};
 109 }
 110
 111 sub Version
 112 {
 113     my($bpc) = @_;
 114     return $bpc->{Version};
 115 }
 116
 117 sub Conf
 118 {
 119     my($bpc) = @_;
 120     return %{$bpc->{Conf}};
 121 }
 122
 123 sub Lang
 124 {
 125     my($bpc) = @_;
 126     return $bpc->{Lang};
 127 }
 128
 129 sub adminJob
 130 {
 131     return " admin ";
 132 }
 133
 134 sub trashJob
 135 {
 136     return " trashClean ";
 137 }
 138
 139 sub ConfValue
 140 {
 141     my($bpc, $param) = @_;
 142
 143     return $bpc->{Conf}{$param};
 144 }
 145
 146 sub verbose
 147 {
 148     my($bpc, $param) = @_;
 149
 150     $bpc->{verbose} = $param if ( defined($param) );
 151     return $bpc->{verbose};
 152 }
 153
 154 sub timeStamp
 155 {
 156     my($bpc, $t, $noPad) = @_;
 157     my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)
 158               = localtime($t || time);
 159     $year += 1900;
 160     $mon++;
 161     return "$year/$mon/$mday " . sprintf("%02d:%02d:%02d", $hour, $min, $sec)
 162             . ($noPad ? "" : " ");
 163 }
 164
 165 #
 166 # An ISO 8601-compliant version of timeStamp.  Needed by the
 167 # --newer-mtime argument to GNU tar in BackupPC::Xfer::Tar.
 168 # Also see http://www.w3.org/TR/NOTE-datetime.
 169 #
 170 sub timeStampISO
 171 {
 172     my($bpc, $t, $noPad) = @_;
 173     my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)
 174               = localtime($t || time);
 175     $year += 1900;
 176     $mon++;
 177     return sprintf("%04d-%02d-%02d ", $year, $mon, $mday)
 178          . sprintf("%02d:%02d:%02d", $hour, $min, $sec)
 179          . ($noPad ? "" : " ");
 180 }
 181
 182 sub BackupInfoRead
 183 {
 184     my($bpc, $host) = @_;
 185     local(*BK_INFO, *LOCK);
 186     my(@Backups);
 187
 188     flock(LOCK, LOCK_EX) if open(LOCK, "$bpc->{TopDir}/pc/$host/LOCK");
 189     if ( open(BK_INFO, "$bpc->{TopDir}/pc/$host/backups") ) {
 190         binmode(BK_INFO);
 191         while ( <BK_INFO> ) {
 192             s/[\n\r]+//;
 193             next if ( !/^(\d+\t(incr|full)[\d\t]*$)/ );
 194             $_ = $1;
 195             @{$Backups[@Backups]}{@{$bpc->{BackupFields}}} = split(/\t/);
 196         }
 197         close(BK_INFO);
 198     }
 199     close(LOCK);
 200     return @Backups;
 201 }
 202
 203 sub BackupInfoWrite
 204 {
 205     my($bpc, $host, @Backups) = @_;
 206     local(*BK_INFO, *LOCK);
 207     my($i);
 208
 209     flock(LOCK, LOCK_EX) if open(LOCK, "$bpc->{TopDir}/pc/$host/LOCK");
 210     unlink("$bpc->{TopDir}/pc/$host/backups.old")
 211                 if ( -f "$bpc->{TopDir}/pc/$host/backups.old" );
 212     rename("$bpc->{TopDir}/pc/$host/backups",
 213            "$bpc->{TopDir}/pc/$host/backups.old")
 214                 if ( -f "$bpc->{TopDir}/pc/$host/backups" );
 215     if ( open(BK_INFO, ">$bpc->{TopDir}/pc/$host/backups") ) {
 216         binmode(BK_INFO);
 217         for ( $i = 0 ; $i < @Backups ; $i++ ) {
 218             my %b = %{$Backups[$i]};
 219             printf(BK_INFO "%s\n", join("\t", @b{@{$bpc->{BackupFields}}}));
 220         }
 221         close(BK_INFO);
 222     }
 223     close(LOCK);
 224 }
 225
 226 sub RestoreInfoRead
 227 {
 228     my($bpc, $host) = @_;
 229     local(*RESTORE_INFO, *LOCK);
 230     my(@Restores);
 231
 232     flock(LOCK, LOCK_EX) if open(LOCK, "$bpc->{TopDir}/pc/$host/LOCK");
 233     if ( open(RESTORE_INFO, "$bpc->{TopDir}/pc/$host/restores") ) {
 234         binmode(RESTORE_INFO);
 235         while ( <RESTORE_INFO> ) {
 236             s/[\n\r]+//;
 237             next if ( !/^(\d+.*)/ );
 238             $_ = $1;
 239             @{$Restores[@Restores]}{@{$bpc->{RestoreFields}}} = split(/\t/);
 240         }
 241         close(RESTORE_INFO);
 242     }
 243     close(LOCK);
 244     return @Restores;
 245 }
 246
 247 sub RestoreInfoWrite
 248 {
 249     my($bpc, $host, @Restores) = @_;
 250     local(*RESTORE_INFO, *LOCK);
 251     my($i);
 252
 253     flock(LOCK, LOCK_EX) if open(LOCK, "$bpc->{TopDir}/pc/$host/LOCK");
 254     unlink("$bpc->{TopDir}/pc/$host/restores.old")
 255                 if ( -f "$bpc->{TopDir}/pc/$host/restores.old" );
 256     rename("$bpc->{TopDir}/pc/$host/restores",
 257            "$bpc->{TopDir}/pc/$host/restores.old")
 258                 if ( -f "$bpc->{TopDir}/pc/$host/restores" );
 259     if ( open(RESTORE_INFO, ">$bpc->{TopDir}/pc/$host/restores") ) {
 260         binmode(RESTORE_INFO);
 261         for ( $i = 0 ; $i < @Restores ; $i++ ) {
 262             my %b = %{$Restores[$i]};
 263             printf(RESTORE_INFO "%s\n",
 264                         join("\t", @b{@{$bpc->{RestoreFields}}}));
 265         }
 266         close(RESTORE_INFO);
 267     }
 268     close(LOCK);
 269 }
 270
 271 sub ConfigRead
 272 {
 273     my($bpc, $host) = @_;
 274     my($ret, $mesg, $config, @configs);
 275
 276     $bpc->{Conf} = ();
 277     push(@configs, "$bpc->{TopDir}/conf/config.pl");
 278     push(@configs, "$bpc->{TopDir}/conf/$host.pl")
 279             if ( $host ne "config" && -f "$bpc->{TopDir}/conf/$host.pl" );
 280     push(@configs, "$bpc->{TopDir}/pc/$host/config.pl")
 281             if ( defined($host) && -f "$bpc->{TopDir}/pc/$host/config.pl" );
 282     foreach $config ( @configs ) {
 283         %Conf = ();
 284         if ( !defined($ret = do $config) && ($! || $@) ) {
 285             $mesg = "Couldn't open $config: $!" if ( $! );
 286             $mesg = "Couldn't execute $config: $@" if ( $@ );
 287             $mesg =~ s/[\n\r]+//;
 288             return $mesg;
 289         }
 290         %{$bpc->{Conf}} = ( %{$bpc->{Conf} || {}}, %Conf );
 291     }
 292     return if ( !defined($bpc->{Conf}{Language}) );
 293     if ( defined($bpc->{Conf}{PerlModuleLoad}) ) {
 294         #
 295         # Load any user-specified perl modules.  This is for
 296         # optional user-defined extensions.
 297         #
 298         $bpc->{Conf}{PerlModuleLoad} = [$bpc->{Conf}{PerlModuleLoad}]
 299                     if ( ref($bpc->{Conf}{PerlModuleLoad}) ne "ARRAY" );
 300         foreach my $module ( @{$bpc->{Conf}{PerlModuleLoad}} ) {
 301             eval("use $module;");
 302         }
 303     }
 304     my $langFile = "$bpc->{LibDir}/BackupPC/Lang/$bpc->{Conf}{Language}.pm";
 305     if ( !defined($ret = do $langFile) && ($! || $@) ) {
 306         $mesg = "Couldn't open language file $langFile: $!" if ( $! );
 307         $mesg = "Couldn't execute language file $langFile: $@" if ( $@ );
 308         $mesg =~ s/[\n\r]+//;
 309         return $mesg;
 310     }
 311     $bpc->{Lang} = \%Lang;
 312     return;
 313 }
 314
 315 #
 316 # Return the mtime of the config file
 317 #
 318 sub ConfigMTime
 319 {
 320     my($bpc) = @_;
 321     return (stat("$bpc->{TopDir}/conf/config.pl"))[9];
 322 }
 323
 324 #
 325 # Returns information from the host file in $bpc->{TopDir}/conf/hosts.
 326 # With no argument a ref to a hash of hosts is returned.  Each
 327 # hash contains fields as specified in the hosts file.  With an
 328 # argument a ref to a single hash is returned with information
 329 # for just that host.
 330 #
 331 sub HostInfoRead
 332 {
 333     my($bpc, $host) = @_;
 334     my(%hosts, @hdr, @fld);
 335     local(*HOST_INFO);
 336
 337     if ( !open(HOST_INFO, "$bpc->{TopDir}/conf/hosts") ) {
 338         print(STDERR $bpc->timeStamp,
 339                      "Can't open $bpc->{TopDir}/conf/hosts\n");
 340         return {};
 341     }
 342     binmode(HOST_INFO);
 343     while ( <HOST_INFO> ) {
 344         s/[\n\r]+//;
 345         s/#.*//;
 346         s/\s+$//;
 347         next if ( /^\s*$/ || !/^([\w\.\\-]+\s+.*)/ );
 348         #
 349         # Split on white space, except if preceded by \
 350         # using zero-width negative look-behind assertion
 351         # (always wanted to use one of those).
 352         #
 353         @fld = split(/(?<!\\)\s+/, $1);
 354         #
 355         # Remove any \
 356         #
 357         foreach ( @fld ) {
 358             s{\\(\s)}{$1}g;
 359         }
 360         if ( @hdr ) {
 361             if ( defined($host) ) {
 362                 next if ( lc($fld[0]) ne $host );
 363                 @{$hosts{lc($fld[0])}}{@hdr} = @fld;
 364                 close(HOST_INFO);
 365                 return \%hosts;
 366             } else {
 367                 @{$hosts{lc($fld[0])}}{@hdr} = @fld;
 368             }
 369         } else {
 370             @hdr = @fld;
 371         }
 372     }
 373     close(HOST_INFO);
 374     return \%hosts;
 375 }
 376
 377 #
 378 # Return the mtime of the hosts file
 379 #
 380 sub HostsMTime
 381 {
 382     my($bpc) = @_;
 383     return (stat("$bpc->{TopDir}/conf/hosts"))[9];
 384 }
 385
 386 #
 387 # Stripped down from File::Path.  In particular we don't print
 388 # many warnings and we try three times to delete each directory
 389 # and file -- for some reason the original File::Path rmtree
 390 # didn't always completely remove a directory tree on the NetApp.
 391 #
 392 # Warning: this routine changes the cwd.
 393 #
 394 sub RmTreeQuiet
 395 {
 396     my($bpc, $pwd, $roots) = @_;
 397     my(@files, $root);
 398
 399     if ( defined($roots) && length($roots) ) {
 400       $roots = [$roots] unless ref $roots;
 401     } else {
 402       print "RmTreeQuiet: No root path(s) specified\n";
 403     }
 404     chdir($pwd);
 405     foreach $root (@{$roots}) {
 406         $root = $1 if ( $root =~ m{(.*?)/*$} );
 407         #
 408         # Try first to simply unlink the file: this avoids an
 409         # extra stat for every file.  If it fails (which it
 410         # will for directories), check if it is a directory and
 411         # then recurse.
 412         #
 413         if ( !unlink($root) ) {
 414             if ( -d $root ) {
 415                 my $d = DirHandle->new($root)
 416                   or print "Can't read $pwd/$root: $!";
 417                 @files = $d->read;
 418                 $d->close;
 419                 @files = grep $_!~/^\.{1,2}$/, @files;
 420                 $bpc->RmTreeQuiet("$pwd/$root", \@files);
 421                 chdir($pwd);
 422                 rmdir($root) || rmdir($root);
 423             } else {
 424                 unlink($root) || unlink($root);
 425             }
 426         }
 427     }
 428 }
 429
 430 #
 431 # Move a directory or file away for later deletion
 432 #
 433 sub RmTreeDefer
 434 {
 435     my($bpc, $trashDir, $file) = @_;
 436     my($i, $f);
 437
 438     return if ( !-e $file );
 439     mkpath($trashDir, 0, 0777) if ( !-d $trashDir );
 440     for ( $i = 0 ; $i < 1000 ; $i++ ) {
 441         $f = sprintf("%s/%d_%d_%d", $trashDir, time, $$, $i);
 442         next if ( -e $f );
 443         return if ( rename($file, $f) );
 444     }
 445     # shouldn't get here, but might if you tried to call this
 446     # across file systems.... just remove the tree right now.
 447     if ( $file =~ /(.*)\/([^\/]*)/ ) {
 448         my($d) = $1;
 449         my($f) = $2;
 450         my($cwd) = Cwd::fastcwd();
 451         $cwd = $1 if ( $cwd =~ /(.*)/ );
 452         $bpc->RmTreeQuiet($d, $f);
 453         chdir($cwd) if ( $cwd );
 454     }
 455 }
 456
 457 #
 458 # Empty the trash directory.  Returns 0 if it did nothing, 1 if it
 459 # did something, -1 if it failed to remove all the files.
 460 #
 461 sub RmTreeTrashEmpty
 462 {
 463     my($bpc, $trashDir) = @_;
 464     my(@files);
 465     my($cwd) = Cwd::fastcwd();
 466
 467     $cwd = $1 if ( $cwd =~ /(.*)/ );
 468     return if ( !-d $trashDir );
 469     my $d = DirHandle->new($trashDir) or carp "Can't read $trashDir: $!";
 470     @files = $d->read;
 471     $d->close;
 472     @files = grep $_!~/^\.{1,2}$/, @files;
 473     return 0 if ( !@files );
 474     $bpc->RmTreeQuiet($trashDir, \@files);
 475     foreach my $f ( @files ) {
 476         return -1 if ( -e $f );
 477     }
 478     chdir($cwd) if ( $cwd );
 479     return 1;
 480 }
 481
 482 #
 483 # Open a connection to the server.  Returns an error string on failure.
 484 # Returns undef on success.
 485 #
 486 sub ServerConnect
 487 {
 488     my($bpc, $host, $port, $justConnect) = @_;
 489     local(*FH);
 490
 491     return if ( defined($bpc->{ServerFD}) );
 492     #
 493     # First try the unix-domain socket
 494     #
 495     my $sockFile = "$bpc->{TopDir}/log/BackupPC.sock";
 496     socket(*FH, PF_UNIX, SOCK_STREAM, 0)     || return "unix socket: $!";
 497     if ( !connect(*FH, sockaddr_un($sockFile)) ) {
 498         my $err = "unix connect: $!";
 499         close(*FH);
 500         if ( $port > 0 ) {
 501             my $proto = getprotobyname('tcp');
 502             my $iaddr = inet_aton($host)     || return "unknown host $host";
 503             my $paddr = sockaddr_in($port, $iaddr);
 504
 505             socket(*FH, PF_INET, SOCK_STREAM, $proto)
 506                                              || return "inet socket: $!";
 507             connect(*FH, $paddr)             || return "inet connect: $!";
 508         } else {
 509             return $err;
 510         }
 511     }
 512     my($oldFH) = select(*FH); $| = 1; select($oldFH);
 513     $bpc->{ServerFD} = *FH;
 514     return if ( $justConnect );
 515     #
 516     # Read the seed that we need for our MD5 message digest.  See
 517     # ServerMesg below.
 518     #
 519     sysread($bpc->{ServerFD}, $bpc->{ServerSeed}, 1024);
 520     $bpc->{ServerMesgCnt} = 0;
 521     return;
 522 }
 523
 524 #
 525 # Check that the server connection is still ok
 526 #
 527 sub ServerOK
 528 {
 529     my($bpc) = @_;
 530
 531     return 0 if ( !defined($bpc->{ServerFD}) );
 532     vec(my $FDread, fileno($bpc->{ServerFD}), 1) = 1;
 533     my $ein = $FDread;
 534     return 0 if ( select(my $rout = $FDread, undef, $ein, 0.0) < 0 );
 535     return 1 if ( !vec($rout, fileno($bpc->{ServerFD}), 1) );
 536 }
 537
 538 #
 539 # Disconnect from the server
 540 #
 541 sub ServerDisconnect
 542 {
 543     my($bpc) = @_;
 544     return if ( !defined($bpc->{ServerFD}) );
 545     close($bpc->{ServerFD});
 546     delete($bpc->{ServerFD});
 547 }
 548
 549 #
 550 # Sends a message to the server and returns with the reply.
 551 #
 552 # To avoid possible attacks via the TCP socket interface, every client
 553 # message is protected by an MD5 digest. The MD5 digest includes four
 554 # items:
 555 #   - a seed that is sent to us when we first connect
 556 #   - a sequence number that increments for each message
 557 #   - a shared secret that is stored in $Conf{ServerMesgSecret}
 558 #   - the message itself.
 559 # The message is sent in plain text preceded by the MD5 digest. A
 560 # snooper can see the plain-text seed sent by BackupPC and plain-text
 561 # message, but cannot construct a valid MD5 digest since the secret in
 562 # $Conf{ServerMesgSecret} is unknown. A replay attack is not possible
 563 # since the seed changes on a per-connection and per-message basis.
 564 #
 565 sub ServerMesg
 566 {
 567     my($bpc, $mesg) = @_;
 568     return if ( !defined(my $fh = $bpc->{ServerFD}) );
 569     my $md5 = Digest::MD5->new;
 570     $md5->add($bpc->{ServerSeed} . $bpc->{ServerMesgCnt}
 571             . $bpc->{Conf}{ServerMesgSecret} . $mesg);
 572     print($fh $md5->b64digest . " $mesg\n");
 573     $bpc->{ServerMesgCnt}++;
 574     return <$fh>;
 575 }
 576
 577 #
 578 # Do initialization for child processes
 579 #
 580 sub ChildInit
 581 {
 582     my($bpc) = @_;
 583     close(STDERR);
 584     open(STDERR, ">&STDOUT");
 585     select(STDERR); $| = 1;
 586     select(STDOUT); $| = 1;
 587     $ENV{PATH} = $bpc->{Conf}{MyPath};
 588 }
 589
 590 #
 591 # Compute the MD5 digest of a file.  For efficiency we don't
 592 # use the whole file for big files:
 593 #   - for files <= 256K we use the file size and the whole file.
 594 #   - for files <= 1M we use the file size, the first 128K and
 595 #     the last 128K.
 596 #   - for files > 1M, we use the file size, the first 128K and
 597 #     the 8th 128K (ie: the 128K up to 1MB).
 598 # See the documentation for a discussion of the tradeoffs in
 599 # how much data we use and how many collisions we get.
 600 #
 601 # Returns the MD5 digest (a hex string) and the file size.
 602 #
 603 sub File2MD5
 604 {
 605     my($bpc, $md5, $name) = @_;
 606     my($data, $fileSize);
 607     local(*N);
 608
 609     $fileSize = (stat($name))[7];
 610     return ("", -1) if ( !-f _ );
 611     $name = $1 if ( $name =~ /(.*)/ );
 612     return ("", 0) if ( $fileSize == 0 );
 613     return ("", -1) if ( !open(N, $name) );
 614     binmode(N);
 615     $md5->reset();
 616     $md5->add($fileSize);
 617     if ( $fileSize > 262144 ) {
 618         #
 619         # read the first and last 131072 bytes of the file,
 620         # up to 1MB.
 621         #
 622         my $seekPosn = ($fileSize > 1048576 ? 1048576 : $fileSize) - 131072;
 623         $md5->add($data) if ( sysread(N, $data, 131072) );
 624         $md5->add($data) if ( sysseek(N, $seekPosn, 0)
 625                                 && sysread(N, $data, 131072) );
 626     } else {
 627         #
 628         # read the whole file
 629         #
 630         $md5->add($data) if ( sysread(N, $data, $fileSize) );
 631     }
 632     close(N);
 633     return ($md5->hexdigest, $fileSize);
 634 }
 635
 636 #
 637 # Compute the MD5 digest of a buffer (string).  For efficiency we don't
 638 # use the whole string for big strings:
 639 #   - for files <= 256K we use the file size and the whole file.
 640 #   - for files <= 1M we use the file size, the first 128K and
 641 #     the last 128K.
 642 #   - for files > 1M, we use the file size, the first 128K and
 643 #     the 8th 128K (ie: the 128K up to 1MB).
 644 # See the documentation for a discussion of the tradeoffs in
 645 # how much data we use and how many collisions we get.
 646 #
 647 # Returns the MD5 digest (a hex string).
 648 #
 649 sub Buffer2MD5
 650 {
 651     my($bpc, $md5, $fileSize, $dataRef) = @_;
 652
 653     $md5->reset();
 654     $md5->add($fileSize);
 655     if ( $fileSize > 262144 ) {
 656         #
 657         # add the first and last 131072 bytes of the string,
 658         # up to 1MB.
 659         #
 660         my $seekPosn = ($fileSize > 1048576 ? 1048576 : $fileSize) - 131072;
 661         $md5->add(substr($$dataRef, 0, 131072));
 662         $md5->add(substr($$dataRef, $seekPosn, 131072));
 663     } else {
 664         #
 665         # add the whole string
 666         #
 667         $md5->add($$dataRef);
 668     }
 669     return $md5->hexdigest;
 670 }
 671
 672 #
 673 # Given an MD5 digest $d and a compress flag, return the full
 674 # path in the pool.
 675 #
 676 sub MD52Path
 677 {
 678     my($bpc, $d, $compress, $poolDir) = @_;
 679
 680     return if ( $d !~ m{(.)(.)(.)(.*)} );
 681     $poolDir = ($compress ? $bpc->{CPoolDir} : $bpc->{PoolDir})
 682                     if ( !defined($poolDir) );
 683     return "$poolDir/$1/$2/$3/$1$2$3$4";
 684 }
 685
 686 #
 687 # For each file, check if the file exists in $bpc->{TopDir}/pool.
 688 # If so, remove the file and make a hardlink to the file in
 689 # the pool.  Otherwise, if the newFile flag is set, make a
 690 # hardlink in the pool to the new file.
 691 #
 692 # Returns 0 if a link should be made to a new file (ie: when the file
 693 #    is a new file but the newFile flag is 0).
 694 # Returns 1 if a link to an existing file is made,
 695 # Returns 2 if a link to a new file is made (only if $newFile is set)
 696 # Returns negative on error.
 697 #
 698 sub MakeFileLink
 699 {
 700     my($bpc, $name, $d, $newFile, $compress) = @_;
 701     my($i, $rawFile);
 702
 703     return -1 if ( !-f $name );
 704     for ( $i = -1 ; ; $i++ ) {
 705         return -2 if ( !defined($rawFile = $bpc->MD52Path($d, $compress)) );
 706         $rawFile .= "_$i" if ( $i >= 0 );
 707         if ( -f $rawFile ) {
 708             if ( (stat(_))[3] < $bpc->{Conf}{HardLinkMax}
 709                     && !compare($name, $rawFile) ) {
 710                 unlink($name);
 711                 return -3 if ( !link($rawFile, $name) );
 712                 return 1;
 713             }
 714         } elsif ( $newFile && -f $name && (stat($name))[3] == 1 ) {
 715             my($newDir);
 716             ($newDir = $rawFile) =~ s{(.*)/.*}{$1};
 717             mkpath($newDir, 0, 0777) if ( !-d $newDir );
 718             return -4 if ( !link($name, $rawFile) );
 719             return 2;
 720         } else {
 721             return 0;
 722         }
 723     }
 724 }
 725
 726 sub CheckHostAlive
 727 {
 728     my($bpc, $host) = @_;
 729     my($s, $pingCmd, $ret);
 730
 731     #
 732     # Return success if the ping cmd is undefined or empty.
 733     #
 734     if ( $bpc->{Conf}{PingCmd} eq "" ) {
 735         print(STDERR "CheckHostAlive: return ok because \$Conf{PingCmd}"
 736                    . " is empty\n") if ( $bpc->{verbose} );
 737         return 0;
 738     }
 739
 740     my $args = {
 741         pingPath => $bpc->{Conf}{PingPath},
 742         host     => $host,
 743     };
 744     $pingCmd = $bpc->cmdVarSubstitute($bpc->{Conf}{PingCmd}, $args);
 745
 746     #
 747     # Do a first ping in case the PC needs to wakeup
 748     #
 749     $s = $bpc->cmdSystemOrEval($pingCmd, undef, $args);
 750     if ( $? ) {
 751         print(STDERR "CheckHostAlive: first ping failed ($?, $!)\n")
 752                         if ( $bpc->{verbose} );
 753         return -1;
 754     }
 755
 756     #
 757     # Do a second ping and get the round-trip time in msec
 758     #
 759     $s = $bpc->cmdSystemOrEval($pingCmd, undef, $args);
 760     if ( $? ) {
 761         print(STDERR "CheckHostAlive: second ping failed ($?, $!)\n")
 762                         if ( $bpc->{verbose} );
 763         return -1;
 764     }
 765     if ( $s =~ /time=([\d\.]+)\s*ms/i ) {
 766         $ret = $1;
 767     } elsif ( $s =~ /time=([\d\.]+)\s*usec/i ) {
 768         $ret =  $1/1000;
 769     } else {
 770         print(STDERR "CheckHostAlive: can't extract round-trip time"
 771                    . " (not fatal)\n") if ( $bpc->{verbose} );
 772         $ret = 0;
 773     }
 774     print(STDERR "CheckHostAlive: returning $ret\n") if ( $bpc->{verbose} );
 775     return $ret;
 776 }
 777
 778 sub CheckFileSystemUsage
 779 {
 780     my($bpc) = @_;
 781     my($topDir) = $bpc->{TopDir};
 782     my($s, $dfCmd);
 783
 784     return 0 if ( $bpc->{Conf}{DfCmd} eq "" );
 785     my $args = {
 786         dfPath   => $bpc->{Conf}{DfPath},
 787         topDir   => $bpc->{TopDir},
 788     };
 789     $dfCmd = $bpc->cmdVarSubstitute($bpc->{Conf}{DfCmd}, $args);
 790     $s = $bpc->cmdSystemOrEval($dfCmd, undef, $args);
 791     return 0 if ( $? || $s !~ /(\d+)%/s );
 792     return $1;
 793 }
 794
 795 #
 796 # Given an IP address, return the host name and user name via
 797 # NetBios.
 798 #
 799 sub NetBiosInfoGet
 800 {
 801     my($bpc, $host) = @_;
 802     my($netBiosHostName, $netBiosUserName);
 803     my($s, $nmbCmd);
 804
 805     #
 806     # Skip NetBios check if NmbLookupCmd is emtpy
 807     #
 808     if ( $bpc->{Conf}{NmbLookupCmd} eq "" ) {
 809         print(STDERR "NetBiosInfoGet: return $host because \$Conf{NmbLookupCmd}"
 810                    . " is empty\n") if ( $bpc->{verbose} );
 811         return ($host, undef);
 812     }
 813
 814     my $args = {
 815         nmbLookupPath => $bpc->{Conf}{NmbLookupPath},
 816         host          => $host,
 817     };
 818     $nmbCmd = $bpc->cmdVarSubstitute($bpc->{Conf}{NmbLookupCmd}, $args);
 819     foreach ( split(/[\n\r]+/, $bpc->cmdSystemOrEval($nmbCmd, undef, $args)) ) {
 820         next if ( !/^\s*([\w\s-]+?)\s*<(\w{2})\> - .*<ACTIVE>/i );
 821         $netBiosHostName ||= $1 if ( $2 eq "00" );  # host is first 00
 822         $netBiosUserName   = $1 if ( $2 eq "03" );  # user is last 03
 823     }
 824     if ( !defined($netBiosHostName) ) {
 825         print(STDERR "NetBiosInfoGet: failed: can't parse return string\n")
 826                         if ( $bpc->{verbose} );
 827         return;
 828     }
 829     $netBiosHostName = lc($netBiosHostName);
 830     $netBiosUserName = lc($netBiosUserName);
 831     print(STDERR "NetBiosInfoGet: success, returning host $netBiosHostName,"
 832                . " user $netBiosUserName\n") if ( $bpc->{verbose} );
 833     return ($netBiosHostName, $netBiosUserName);
 834 }
 835
 836 #
 837 # Given a NetBios name lookup the IP address via NetBios.
 838 # In the case of a host returning multiple interfaces we
 839 # return the first IP address that matches the subnet mask.
 840 # If none match the subnet mask (or nmblookup doesn't print
 841 # the subnet mask) then just the first IP address is returned.
 842 #
 843 sub NetBiosHostIPFind
 844 {
 845     my($bpc, $host) = @_;
 846     my($netBiosHostName, $netBiosUserName);
 847     my($s, $nmbCmd, $subnet, $ipAddr, $firstIpAddr);
 848
 849     #
 850     # Skip NetBios lookup if NmbLookupFindHostCmd is emtpy
 851     #
 852     if ( $bpc->{Conf}{NmbLookupFindHostCmd} eq "" ) {
 853         print(STDERR "NetBiosHostIPFind: return $host because"
 854                    . " \$Conf{NmbLookupFindHostCmd} is empty\n")
 855                         if ( $bpc->{verbose} );
 856         return $host;
 857     }
 858
 859     my $args = {
 860         nmbLookupPath => $bpc->{Conf}{NmbLookupPath},
 861         host          => $host,
 862     };
 863     $nmbCmd = $bpc->cmdVarSubstitute($bpc->{Conf}{NmbLookupFindHostCmd}, $args);
 864     foreach my $resp ( split(/[\n\r]+/, $bpc->cmdSystemOrEval($nmbCmd, undef,
 865                                                               $args) ) ) {
 866         if ( $resp =~ /querying\s+\Q$host\E\s+on\s+(\d+\.\d+\.\d+\.\d+)/i ) {
 867             $subnet = $1;
 868             $subnet = $1 if ( $subnet =~ /^(.*?)(\.255)+$/ );
 869         } elsif ( $resp =~ /^\s*(\d+\.\d+\.\d+\.\d+)\s+\Q$host/ ) {
 870             my $ip = $1;
 871             $firstIpAddr = $ip if ( !defined($firstIpAddr) );
 872             $ipAddr      = $ip if ( !defined($ipAddr) && $ip =~ /^\Q$subnet/ );
 873         }
 874     }
 875     $ipAddr = $firstIpAddr if ( !defined($ipAddr) );
 876     if ( defined($ipAddr) ) {
 877         print(STDERR "NetBiosHostIPFind: found IP address $ipAddr for"
 878                    . " host $host\n") if ( $bpc->{verbose} );
 879         return $ipAddr;
 880     } else {
 881         print(STDERR "NetBiosHostIPFind: couldn't find IP address for"
 882                    . " host $host\n") if ( $bpc->{verbose} );
 883         return;
 884     }
 885 }
 886
 887 sub fileNameEltMangle
 888 {
 889     my($bpc, $name) = @_;
 890
 891     return "" if ( $name eq "" );
 892     $name =~ s{([%/\n\r])}{sprintf("%%%02x", ord($1))}eg;
 893     return "f$name";
 894 }
 895
 896 #
 897 # We store files with every name preceded by "f".  This
 898 # avoids possible name conflicts with other information
 899 # we store in the same directories (eg: attribute info).
 900 # The process of turning a normal path into one with each
 901 # node prefixed with "f" is called mangling.
 902 #
 903 sub fileNameMangle
 904 {
 905     my($bpc, $name) = @_;
 906
 907     $name =~ s{/([^/]+)}{"/" . $bpc->fileNameEltMangle($1)}eg;
 908     $name =~ s{^([^/]+)}{$bpc->fileNameEltMangle($1)}eg;
 909     return $name;
 910 }
 911
 912 #
 913 # This undoes FileNameMangle
 914 #
 915 sub fileNameUnmangle
 916 {
 917     my($bpc, $name) = @_;
 918
 919     $name =~ s{/f}{/}g;
 920     $name =~ s{^f}{};
 921     $name =~ s{%(..)}{chr(hex($1))}eg;
 922     return $name;
 923 }
 924
 925 #
 926 # Escape shell meta-characters with backslashes.
 927 # This should be applied to each argument seperately, not an
 928 # entire shell command.
 929 #
 930 sub shellEscape
 931 {
 932     my($bpc, $cmd) = @_;
 933
 934     $cmd =~ s/([][;&()<>{}|^\n\r\t *\$\\'"`?])/\\$1/g;
 935     return $cmd;
 936 }
 937
 938 #
 939 # For printing exec commands (which don't use a shell) so they look like
 940 # a valid shell command this function should be called with the exec
 941 # args.  The shell command string is returned.
 942 #
 943 sub execCmd2ShellCmd
 944 {
 945     my($bpc, @args) = @_;
 946     my $str;
 947
 948     foreach my $a ( @args ) {
 949         $str .= " " if ( $str ne "" );
 950         $str .= $bpc->shellEscape($a);
 951     }
 952     return $str;
 953 }
 954
 955 #
 956 # Do a URI-style escape to protect/encode special characters
 957 #
 958 sub uriEsc
 959 {
 960     my($bpc, $s) = @_;
 961     $s =~ s{([^\w.\/-])}{sprintf("%%%02X", ord($1));}eg;
 962     return $s;
 963 }
 964
 965 #
 966 # Do a URI-style unescape to restore special characters
 967 #
 968 sub uriUnesc
 969 {
 970     my($bpc, $s) = @_;
 971     $s =~ s{%(..)}{chr(hex($1))}eg;
 972     return $s;
 973 }
 974
 975 #
 976 # Do variable substitution prior to execution of a command.
 977 #
 978 sub cmdVarSubstitute
 979 {
 980     my($bpc, $template, $vars) = @_;
 981     my(@cmd);
 982
 983     #
 984     # Return without any substitution if the first entry starts with "&",
 985     # indicating this is perl code.
 986     #
 987     if ( (ref($template) eq "ARRAY" ? $template->[0] : $template) =~ /^\&/ ) {
 988         return $template;
 989     }
 990     if ( ref($template) ne "ARRAY" ) {
 991         #
 992         # Split at white space, except if escaped by \
 993         #
 994         $template = [split(/(?<!\\)\s+/, $template)];
 995         #
 996         # Remove the \ that escaped white space.
 997         #
 998         foreach ( @$template ) {
 999             s{\\(\s)}{$1}g;
1000         }
1001     }
1002     #
1003     # Merge variables into @tarClientCmd
1004     #
1005     foreach my $arg ( @$template ) {
1006         #
1007         # Replace scalar variables first
1008         #
1009         $arg =~ s{\$(\w+)(\+?)}{
1010             exists($vars->{$1}) && ref($vars->{$1}) ne "ARRAY"
1011                 ? ($2 eq "+" ? $bpc->shellEscape($vars->{$1}) : $vars->{$1})
1012                 : "\$$1$2"
1013         }eg;
1014         #
1015         # Now replicate any array arguments; this just works for just one
1016         # array var in each argument.
1017         #
1018         if ( $arg =~ m{(.*)\$(\w+)(\+?)(.*)} && ref($vars->{$2}) eq "ARRAY" ) {
1019             my $pre  = $1;
1020             my $var  = $2;
1021             my $esc  = $3;
1022             my $post = $4;
1023             foreach my $v ( @{$vars->{$var}} ) {
1024                 $v = $bpc->shellEscape($v) if ( $esc eq "+" );
1025                 push(@cmd, "$pre$v$post");
1026             }
1027         } else {
1028             push(@cmd, $arg);
1029         }
1030     }
1031     return \@cmd;
1032 }
1033
1034 #
1035 # Exec or eval a command.  $cmd is either a string on an array ref.
1036 #
1037 # @args are optional arguments for the eval() case; they are not used
1038 # for exec().
1039 #
1040 sub cmdExecOrEval
1041 {
1042     my($bpc, $cmd, @args) = @_;
1043
1044     if ( (ref($cmd) eq "ARRAY" ? $cmd->[0] : $cmd) =~ /^\&/ ) {
1045         $cmd = join(" ", $cmd) if ( ref($cmd) eq "ARRAY" );
1046         print(STDERR "cmdExecOrEval: about to eval perl code $cmd\n")
1047                         if ( $bpc->{verbose} );
1048         eval($cmd);
1049         print(STDERR "Perl code fragment for exec shouldn't return!!\n");
1050         exit(1);
1051     } else {
1052         $cmd = [split(/\s+/, $cmd)] if ( ref($cmd) ne "ARRAY" );
1053         print(STDERR "cmdExecOrEval: about to exec ",
1054                       $bpc->execCmd2ShellCmd(@$cmd), "\n")
1055                             if ( $bpc->{verbose} );
1056         exec(map { m/(.*)/ } @$cmd);            # untaint
1057         print(STDERR "Exec failed for @$cmd\n");
1058         exit(1);
1059     }
1060 }
1061
1062 #
1063 # System or eval a command.  $cmd is either a string on an array ref.
1064 # $stdoutCB is a callback for output generated by the command.  If it
1065 # is undef then output is returned.  If it is a code ref then the function
1066 # is called with each piece of output as an argument.  If it is a scalar
1067 # ref the output is appended to this variable.
1068 #
1069 # @args are optional arguments for the eval() case; they are not used
1070 # for system().
1071 #
1072 # Also, $? should be set when the CHILD pipe is closed.
1073 #
1074 sub cmdSystemOrEval
1075 {
1076     my($bpc, $cmd, $stdoutCB, @args) = @_;
1077     my($pid, $out, $allOut);
1078     local(*CHILD);
1079
1080     if ( (ref($cmd) eq "ARRAY" ? $cmd->[0] : $cmd) =~ /^\&/ ) {
1081         $cmd = join(" ", $cmd) if ( ref($cmd) eq "ARRAY" );
1082         print(STDERR "cmdSystemOrEval: about to eval perl code $cmd\n")
1083                         if ( $bpc->{verbose} );
1084         $out = eval($cmd);
1085         $$stdoutCB .= $out if ( ref($stdoutCB) eq 'SCALAR' );
1086         &$stdoutCB($out)   if ( ref($stdoutCB) eq 'CODE' );
1087         print(STDERR "cmdSystemOrEval: finished: got output $out\n")
1088                         if ( $bpc->{verbose} );
1089         return $out        if ( !defined($stdoutCB) );
1090         return;
1091     } else {
1092         $cmd = [split(/\s+/, $cmd)] if ( ref($cmd) ne "ARRAY" );
1093         print(STDERR "cmdSystemOrEval: about to system ",
1094                       $bpc->execCmd2ShellCmd(@$cmd), "\n")
1095                         if ( $bpc->{verbose} );
1096         if ( !defined($pid = open(CHILD, "-|")) ) {
1097             my $err = "Can't fork to run @$cmd\n";
1098             $? = 1;
1099             $$stdoutCB .= $err if ( ref($stdoutCB) eq 'SCALAR' );
1100             &$stdoutCB($err)   if ( ref($stdoutCB) eq 'CODE' );
1101             return $err        if ( !defined($stdoutCB) );
1102             return;
1103         }
1104         binmode(CHILD);
1105         if ( !$pid ) {
1106             #
1107             # This is the child
1108             #
1109             close(STDERR);
1110             open(STDERR, ">&STDOUT");
1111             exec(map { m/(.*)/ } @$cmd);                # untaint
1112             print("Exec of @$cmd failed\n");
1113             exit(1);
1114         }
1115         #
1116         # The parent gathers the output from the child
1117         #
1118         while ( <CHILD> ) {
1119             $$stdoutCB .= $_ if ( ref($stdoutCB) eq 'SCALAR' );
1120             &$stdoutCB($_)   if ( ref($stdoutCB) eq 'CODE' );
1121             $out .= $_       if ( !defined($stdoutCB) );
1122             $allOut .= $_    if ( $bpc->{verbose} );
1123         }
1124         $? = 0;
1125         close(CHILD);
1126     }
1127     print(STDERR "cmdSystemOrEval: finished: got output $allOut\n")
1128                         if ( $bpc->{verbose} );
1129     return $out;
1130 }
1131
1132 1;