lib/BackupPC/Lib.pm

   1 #============================================================= -*-perl-*-
   2 #
   3 # BackupPC::Lib package
   4 #
   5 # DESCRIPTION
   6 #
   7 #   This library defines a BackupPC::Lib class and a variety of utility
   8 #   functions used by BackupPC.
   9 #
  10 # AUTHOR
  11 #   Craig Barratt  <cbarratt@users.sourceforge.net>
  12 #
  13 # COPYRIGHT
  14 #   Copyright (C) 2001  Craig Barratt
  15 #
  16 #   This program is free software; you can redistribute it and/or modify
  17 #   it under the terms of the GNU General Public License as published by
  18 #   the Free Software Foundation; either version 2 of the License, or
  19 #   (at your option) any later version.
  20 #
  21 #   This program is distributed in the hope that it will be useful,
  22 #   but WITHOUT ANY WARRANTY; without even the implied warranty of
  23 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  24 #   GNU General Public License for more details.
  25 #
  26 #   You should have received a copy of the GNU General Public License
  27 #   along with this program; if not, write to the Free Software
  28 #   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  29 #
  30 #========================================================================
  31 #
  32 # Version 2.0.0beta2, released 13 Apr 2003.
  33 #
  34 # See http://backuppc.sourceforge.net.
  35 #
  36 #========================================================================
  37
  38 package BackupPC::Lib;
  39
  40 use strict;
  41
  42 use vars qw(%Conf %Lang);
  43 use Fcntl qw/:flock/;
  44 use Carp;
  45 use DirHandle ();
  46 use File::Path;
  47 use File::Compare;
  48 use Socket;
  49 use Cwd;
  50 use Digest::MD5;
  51
  52 sub new
  53 {
  54     my $class = shift;
  55     my($topDir, $installDir, $noUserCheck) = @_;
  56
  57     my $bpc = bless {
  58         TopDir  => $topDir || '/data/BackupPC',
  59         BinDir  => $installDir || '/usr/local/BackupPC',
  60         LibDir  => $installDir || '/usr/local/BackupPC',
  61         Version => '2.0.0beta2',
  62         BackupFields => [qw(
  63                     num type startTime endTime
  64                     nFiles size nFilesExist sizeExist nFilesNew sizeNew
  65                     xferErrs xferBadFile xferBadShare tarErrs
  66                     compress sizeExistComp sizeNewComp
  67                     noFill fillFromNum mangle xferMethod level
  68                 )],
  69         RestoreFields => [qw(
  70                     num startTime endTime result errorMsg nFiles size
  71                     tarCreateErrs xferErrs
  72                 )],
  73     }, $class;
  74     $bpc->{BinDir} .= "/bin";
  75     $bpc->{LibDir} .= "/lib";
  76     #
  77     # Clean up %ENV and setup other variables.
  78     #
  79     delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
  80     $bpc->{PoolDir}  = "$bpc->{TopDir}/pool";
  81     $bpc->{CPoolDir} = "$bpc->{TopDir}/cpool";
  82     if ( defined(my $error = $bpc->ConfigRead()) ) {
  83         print(STDERR $error, "\n");
  84         return;
  85     }
  86     #
  87     # Verify we are running as the correct user
  88     #
  89     if ( !$noUserCheck
  90             && $bpc->{Conf}{BackupPCUserVerify}
  91             && $> != (my $uid = (getpwnam($bpc->{Conf}{BackupPCUser}))[2]) ) {
  92         print("Wrong user: my userid is $>, instead of $uid"
  93             . " ($bpc->{Conf}{BackupPCUser})\n");
  94         return;
  95     }
  96     return $bpc;
  97 }
  98
  99 sub TopDir
 100 {
 101     my($bpc) = @_;
 102     return $bpc->{TopDir};
 103 }
 104
 105 sub BinDir
 106 {
 107     my($bpc) = @_;
 108     return $bpc->{BinDir};
 109 }
 110
 111 sub Version
 112 {
 113     my($bpc) = @_;
 114     return $bpc->{Version};
 115 }
 116
 117 sub Conf
 118 {
 119     my($bpc) = @_;
 120     return %{$bpc->{Conf}};
 121 }
 122
 123 sub Lang
 124 {
 125     my($bpc) = @_;
 126     return $bpc->{Lang};
 127 }
 128
 129 sub adminJob
 130 {
 131     return " admin ";
 132 }
 133
 134 sub trashJob
 135 {
 136     return " trashClean ";
 137 }
 138
 139 sub ConfValue
 140 {
 141     my($bpc, $param) = @_;
 142
 143     return $bpc->{Conf}{$param};
 144 }
 145
 146 sub verbose
 147 {
 148     my($bpc, $param) = @_;
 149
 150     $bpc->{verbose} = $param if ( defined($param) );
 151     return $bpc->{verbose};
 152 }
 153
 154 sub timeStamp
 155 {
 156     my($bpc, $t, $noPad) = @_;
 157     my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)
 158               = localtime($t || time);
 159     $year += 1900;
 160     $mon++;
 161     return "$year/$mon/$mday " . sprintf("%02d:%02d:%02d", $hour, $min, $sec)
 162             . ($noPad ? "" : " ");
 163 }
 164
 165 #
 166 # An ISO 8601-compliant version of timeStamp.  Needed by the
 167 # --newer-mtime argument to GNU tar in BackupPC::Xfer::Tar.
 168 # Also see http://www.w3.org/TR/NOTE-datetime.
 169 #
 170 sub timeStampISO
 171 {
 172     my($bpc, $t, $noPad) = @_;
 173     my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)
 174               = localtime($t || time);
 175     $year += 1900;
 176     $mon++;
 177     return sprintf("%04d-%02d-%02d ", $year, $mon, $mday)
 178          . sprintf("%02d:%02d:%02d", $hour, $min, $sec)
 179          . ($noPad ? "" : " ");
 180 }
 181
 182 sub BackupInfoRead
 183 {
 184     my($bpc, $host) = @_;
 185     local(*BK_INFO, *LOCK);
 186     my(@Backups);
 187
 188     flock(LOCK, LOCK_EX) if open(LOCK, "$bpc->{TopDir}/pc/$host/LOCK");
 189     if ( open(BK_INFO, "$bpc->{TopDir}/pc/$host/backups") ) {
 190         while ( <BK_INFO> ) {
 191             s/[\n\r]+//;
 192             next if ( !/^(\d+\t(incr|full)[\d\t]*$)/ );
 193             $_ = $1;
 194             @{$Backups[@Backups]}{@{$bpc->{BackupFields}}} = split(/\t/);
 195         }
 196         close(BK_INFO);
 197     }
 198     close(LOCK);
 199     return @Backups;
 200 }
 201
 202 sub BackupInfoWrite
 203 {
 204     my($bpc, $host, @Backups) = @_;
 205     local(*BK_INFO, *LOCK);
 206     my($i);
 207
 208     flock(LOCK, LOCK_EX) if open(LOCK, "$bpc->{TopDir}/pc/$host/LOCK");
 209     unlink("$bpc->{TopDir}/pc/$host/backups.old")
 210                 if ( -f "$bpc->{TopDir}/pc/$host/backups.old" );
 211     rename("$bpc->{TopDir}/pc/$host/backups",
 212            "$bpc->{TopDir}/pc/$host/backups.old")
 213                 if ( -f "$bpc->{TopDir}/pc/$host/backups" );
 214     if ( open(BK_INFO, ">$bpc->{TopDir}/pc/$host/backups") ) {
 215         for ( $i = 0 ; $i < @Backups ; $i++ ) {
 216             my %b = %{$Backups[$i]};
 217             printf(BK_INFO "%s\n", join("\t", @b{@{$bpc->{BackupFields}}}));
 218         }
 219         close(BK_INFO);
 220     }
 221     close(LOCK);
 222 }
 223
 224 sub RestoreInfoRead
 225 {
 226     my($bpc, $host) = @_;
 227     local(*RESTORE_INFO, *LOCK);
 228     my(@Restores);
 229
 230     flock(LOCK, LOCK_EX) if open(LOCK, "$bpc->{TopDir}/pc/$host/LOCK");
 231     if ( open(RESTORE_INFO, "$bpc->{TopDir}/pc/$host/restores") ) {
 232         while ( <RESTORE_INFO> ) {
 233             s/[\n\r]+//;
 234             next if ( !/^(\d+.*)/ );
 235             $_ = $1;
 236             @{$Restores[@Restores]}{@{$bpc->{RestoreFields}}} = split(/\t/);
 237         }
 238         close(RESTORE_INFO);
 239     }
 240     close(LOCK);
 241     return @Restores;
 242 }
 243
 244 sub RestoreInfoWrite
 245 {
 246     my($bpc, $host, @Restores) = @_;
 247     local(*RESTORE_INFO, *LOCK);
 248     my($i);
 249
 250     flock(LOCK, LOCK_EX) if open(LOCK, "$bpc->{TopDir}/pc/$host/LOCK");
 251     unlink("$bpc->{TopDir}/pc/$host/restores.old")
 252                 if ( -f "$bpc->{TopDir}/pc/$host/restores.old" );
 253     rename("$bpc->{TopDir}/pc/$host/restores",
 254            "$bpc->{TopDir}/pc/$host/restores.old")
 255                 if ( -f "$bpc->{TopDir}/pc/$host/restores" );
 256     if ( open(RESTORE_INFO, ">$bpc->{TopDir}/pc/$host/restores") ) {
 257         for ( $i = 0 ; $i < @Restores ; $i++ ) {
 258             my %b = %{$Restores[$i]};
 259             printf(RESTORE_INFO "%s\n",
 260                         join("\t", @b{@{$bpc->{RestoreFields}}}));
 261         }
 262         close(RESTORE_INFO);
 263     }
 264     close(LOCK);
 265 }
 266
 267 sub ConfigRead
 268 {
 269     my($bpc, $host) = @_;
 270     my($ret, $mesg, $config, @configs);
 271
 272     $bpc->{Conf} = ();
 273     push(@configs, "$bpc->{TopDir}/conf/config.pl");
 274     push(@configs, "$bpc->{TopDir}/conf/$host.pl")
 275             if ( $host ne "config" && -f "$bpc->{TopDir}/conf/$host.pl" );
 276     push(@configs, "$bpc->{TopDir}/pc/$host/config.pl")
 277             if ( defined($host) && -f "$bpc->{TopDir}/pc/$host/config.pl" );
 278     foreach $config ( @configs ) {
 279         %Conf = ();
 280         if ( !defined($ret = do $config) && ($! || $@) ) {
 281             $mesg = "Couldn't open $config: $!" if ( $! );
 282             $mesg = "Couldn't execute $config: $@" if ( $@ );
 283             $mesg =~ s/[\n\r]+//;
 284             return $mesg;
 285         }
 286         %{$bpc->{Conf}} = ( %{$bpc->{Conf} || {}}, %Conf );
 287     }
 288     return if ( !defined($bpc->{Conf}{Language}) );
 289     if ( defined($bpc->{Conf}{PerlModuleLoad}) ) {
 290         #
 291         # Load any user-specified perl modules.  This is for
 292         # optional user-defined extensions.
 293         #
 294         $bpc->{Conf}{PerlModuleLoad} = [$bpc->{Conf}{PerlModuleLoad}]
 295                     if ( ref($bpc->{Conf}{PerlModuleLoad}) ne "ARRAY" );
 296         foreach my $module ( @{$bpc->{Conf}{PerlModuleLoad}} ) {
 297             eval("use $module;");
 298         }
 299     }
 300     my $langFile = "$bpc->{LibDir}/BackupPC/Lang/$bpc->{Conf}{Language}.pm";
 301     if ( !defined($ret = do $langFile) && ($! || $@) ) {
 302         $mesg = "Couldn't open language file $langFile: $!" if ( $! );
 303         $mesg = "Couldn't execute language file $langFile: $@" if ( $@ );
 304         $mesg =~ s/[\n\r]+//;
 305         return $mesg;
 306     }
 307     $bpc->{Lang} = \%Lang;
 308     return;
 309 }
 310
 311 #
 312 # Return the mtime of the config file
 313 #
 314 sub ConfigMTime
 315 {
 316     my($bpc) = @_;
 317     return (stat("$bpc->{TopDir}/conf/config.pl"))[9];
 318 }
 319
 320 #
 321 # Returns information from the host file in $bpc->{TopDir}/conf/hosts.
 322 # With no argument a ref to a hash of hosts is returned.  Each
 323 # hash contains fields as specified in the hosts file.  With an
 324 # argument a ref to a single hash is returned with information
 325 # for just that host.
 326 #
 327 sub HostInfoRead
 328 {
 329     my($bpc, $host) = @_;
 330     my(%hosts, @hdr, @fld);
 331     local(*HOST_INFO);
 332
 333     if ( !open(HOST_INFO, "$bpc->{TopDir}/conf/hosts") ) {
 334         print(STDERR $bpc->timeStamp,
 335                      "Can't open $bpc->{TopDir}/conf/hosts\n");
 336         return {};
 337     }
 338     while ( <HOST_INFO> ) {
 339         s/[\n\r]+//;
 340         s/#.*//;
 341         s/\s+$//;
 342         next if ( /^\s*$/ || !/^([\w\.\\-]+\s+.*)/ );
 343         #
 344         # Split on white space, except if preceded by \
 345         # using zero-width negative look-behind assertion
 346         # (always wanted to use one of those).
 347         #
 348         @fld = split(/(?<!\\)\s+/, $1);
 349         #
 350         # Remove any \
 351         #
 352         foreach ( @fld ) {
 353             s{\\(\s)}{$1}g;
 354         }
 355         if ( @hdr ) {
 356             if ( defined($host) ) {
 357                 next if ( lc($fld[0]) ne $host );
 358                 @{$hosts{lc($fld[0])}}{@hdr} = @fld;
 359                 close(HOST_INFO);
 360                 return \%hosts;
 361             } else {
 362                 @{$hosts{lc($fld[0])}}{@hdr} = @fld;
 363             }
 364         } else {
 365             @hdr = @fld;
 366         }
 367     }
 368     close(HOST_INFO);
 369     return \%hosts;
 370 }
 371
 372 #
 373 # Return the mtime of the hosts file
 374 #
 375 sub HostsMTime
 376 {
 377     my($bpc) = @_;
 378     return (stat("$bpc->{TopDir}/conf/hosts"))[9];
 379 }
 380
 381 #
 382 # Stripped down from File::Path.  In particular we don't print
 383 # many warnings and we try three times to delete each directory
 384 # and file -- for some reason the original File::Path rmtree
 385 # didn't always completely remove a directory tree on the NetApp.
 386 #
 387 # Warning: this routine changes the cwd.
 388 #
 389 sub RmTreeQuiet
 390 {
 391     my($bpc, $pwd, $roots) = @_;
 392     my(@files, $root);
 393
 394     if ( defined($roots) && length($roots) ) {
 395       $roots = [$roots] unless ref $roots;
 396     } else {
 397       print "RmTreeQuiet: No root path(s) specified\n";
 398     }
 399     chdir($pwd);
 400     foreach $root (@{$roots}) {
 401         $root = $1 if ( $root =~ m{(.*?)/*$} );
 402         #
 403         # Try first to simply unlink the file: this avoids an
 404         # extra stat for every file.  If it fails (which it
 405         # will for directories), check if it is a directory and
 406         # then recurse.
 407         #
 408         if ( !unlink($root) ) {
 409             if ( -d $root ) {
 410                 my $d = DirHandle->new($root)
 411                   or print "Can't read $pwd/$root: $!";
 412                 @files = $d->read;
 413                 $d->close;
 414                 @files = grep $_!~/^\.{1,2}$/, @files;
 415                 $bpc->RmTreeQuiet("$pwd/$root", \@files);
 416                 chdir($pwd);
 417                 rmdir($root) || rmdir($root);
 418             } else {
 419                 unlink($root) || unlink($root);
 420             }
 421         }
 422     }
 423 }
 424
 425 #
 426 # Move a directory or file away for later deletion
 427 #
 428 sub RmTreeDefer
 429 {
 430     my($bpc, $trashDir, $file) = @_;
 431     my($i, $f);
 432
 433     return if ( !-e $file );
 434     mkpath($trashDir, 0, 0777) if ( !-d $trashDir );
 435     for ( $i = 0 ; $i < 1000 ; $i++ ) {
 436         $f = sprintf("%s/%d_%d_%d", $trashDir, time, $$, $i);
 437         next if ( -e $f );
 438         return if ( rename($file, $f) );
 439     }
 440     # shouldn't get here, but might if you tried to call this
 441     # across file systems.... just remove the tree right now.
 442     if ( $file =~ /(.*)\/([^\/]*)/ ) {
 443         my($d) = $1;
 444         my($f) = $2;
 445         my($cwd) = Cwd::fastcwd();
 446         $cwd = $1 if ( $cwd =~ /(.*)/ );
 447         $bpc->RmTreeQuiet($d, $f);
 448         chdir($cwd) if ( $cwd );
 449     }
 450 }
 451
 452 #
 453 # Empty the trash directory.  Returns 0 if it did nothing, 1 if it
 454 # did something, -1 if it failed to remove all the files.
 455 #
 456 sub RmTreeTrashEmpty
 457 {
 458     my($bpc, $trashDir) = @_;
 459     my(@files);
 460     my($cwd) = Cwd::fastcwd();
 461
 462     $cwd = $1 if ( $cwd =~ /(.*)/ );
 463     return if ( !-d $trashDir );
 464     my $d = DirHandle->new($trashDir) or carp "Can't read $trashDir: $!";
 465     @files = $d->read;
 466     $d->close;
 467     @files = grep $_!~/^\.{1,2}$/, @files;
 468     return 0 if ( !@files );
 469     $bpc->RmTreeQuiet($trashDir, \@files);
 470     foreach my $f ( @files ) {
 471         return -1 if ( -e $f );
 472     }
 473     chdir($cwd) if ( $cwd );
 474     return 1;
 475 }
 476
 477 #
 478 # Open a connection to the server.  Returns an error string on failure.
 479 # Returns undef on success.
 480 #
 481 sub ServerConnect
 482 {
 483     my($bpc, $host, $port, $justConnect) = @_;
 484     local(*FH);
 485
 486     return if ( defined($bpc->{ServerFD}) );
 487     #
 488     # First try the unix-domain socket
 489     #
 490     my $sockFile = "$bpc->{TopDir}/log/BackupPC.sock";
 491     socket(*FH, PF_UNIX, SOCK_STREAM, 0)     || return "unix socket: $!";
 492     if ( !connect(*FH, sockaddr_un($sockFile)) ) {
 493         my $err = "unix connect: $!";
 494         close(*FH);
 495         if ( $port > 0 ) {
 496             my $proto = getprotobyname('tcp');
 497             my $iaddr = inet_aton($host)     || return "unknown host $host";
 498             my $paddr = sockaddr_in($port, $iaddr);
 499
 500             socket(*FH, PF_INET, SOCK_STREAM, $proto)
 501                                              || return "inet socket: $!";
 502             connect(*FH, $paddr)             || return "inet connect: $!";
 503         } else {
 504             return $err;
 505         }
 506     }
 507     my($oldFH) = select(*FH); $| = 1; select($oldFH);
 508     $bpc->{ServerFD} = *FH;
 509     return if ( $justConnect );
 510     #
 511     # Read the seed that we need for our MD5 message digest.  See
 512     # ServerMesg below.
 513     #
 514     sysread($bpc->{ServerFD}, $bpc->{ServerSeed}, 1024);
 515     $bpc->{ServerMesgCnt} = 0;
 516     return;
 517 }
 518
 519 #
 520 # Check that the server connection is still ok
 521 #
 522 sub ServerOK
 523 {
 524     my($bpc) = @_;
 525
 526     return 0 if ( !defined($bpc->{ServerFD}) );
 527     vec(my $FDread, fileno($bpc->{ServerFD}), 1) = 1;
 528     my $ein = $FDread;
 529     return 0 if ( select(my $rout = $FDread, undef, $ein, 0.0) < 0 );
 530     return 1 if ( !vec($rout, fileno($bpc->{ServerFD}), 1) );
 531 }
 532
 533 #
 534 # Disconnect from the server
 535 #
 536 sub ServerDisconnect
 537 {
 538     my($bpc) = @_;
 539     return if ( !defined($bpc->{ServerFD}) );
 540     close($bpc->{ServerFD});
 541     delete($bpc->{ServerFD});
 542 }
 543
 544 #
 545 # Sends a message to the server and returns with the reply.
 546 #
 547 # To avoid possible attacks via the TCP socket interface, every client
 548 # message is protected by an MD5 digest. The MD5 digest includes four
 549 # items:
 550 #   - a seed that is sent to us when we first connect
 551 #   - a sequence number that increments for each message
 552 #   - a shared secret that is stored in $Conf{ServerMesgSecret}
 553 #   - the message itself.
 554 # The message is sent in plain text preceded by the MD5 digest. A
 555 # snooper can see the plain-text seed sent by BackupPC and plain-text
 556 # message, but cannot construct a valid MD5 digest since the secret in
 557 # $Conf{ServerMesgSecret} is unknown. A replay attack is not possible
 558 # since the seed changes on a per-connection and per-message basis.
 559 #
 560 sub ServerMesg
 561 {
 562     my($bpc, $mesg) = @_;
 563     return if ( !defined(my $fh = $bpc->{ServerFD}) );
 564     my $md5 = Digest::MD5->new;
 565     $md5->add($bpc->{ServerSeed} . $bpc->{ServerMesgCnt}
 566             . $bpc->{Conf}{ServerMesgSecret} . $mesg);
 567     print($fh $md5->b64digest . " $mesg\n");
 568     $bpc->{ServerMesgCnt}++;
 569     return <$fh>;
 570 }
 571
 572 #
 573 # Do initialization for child processes
 574 #
 575 sub ChildInit
 576 {
 577     my($bpc) = @_;
 578     close(STDERR);
 579     open(STDERR, ">&STDOUT");
 580     select(STDERR); $| = 1;
 581     select(STDOUT); $| = 1;
 582     $ENV{PATH} = $bpc->{Conf}{MyPath};
 583 }
 584
 585 #
 586 # Compute the MD5 digest of a file.  For efficiency we don't
 587 # use the whole file for big files:
 588 #   - for files <= 256K we use the file size and the whole file.
 589 #   - for files <= 1M we use the file size, the first 128K and
 590 #     the last 128K.
 591 #   - for files > 1M, we use the file size, the first 128K and
 592 #     the 8th 128K (ie: the 128K up to 1MB).
 593 # See the documentation for a discussion of the tradeoffs in
 594 # how much data we use and how many collisions we get.
 595 #
 596 # Returns the MD5 digest (a hex string) and the file size.
 597 #
 598 sub File2MD5
 599 {
 600     my($bpc, $md5, $name) = @_;
 601     my($data, $fileSize);
 602     local(*N);
 603
 604     $fileSize = (stat($name))[7];
 605     return ("", -1) if ( !-f _ );
 606     $name = $1 if ( $name =~ /(.*)/ );
 607     return ("", 0) if ( $fileSize == 0 );
 608     return ("", -1) if ( !open(N, $name) );
 609     $md5->reset();
 610     $md5->add($fileSize);
 611     if ( $fileSize > 262144 ) {
 612         #
 613         # read the first and last 131072 bytes of the file,
 614         # up to 1MB.
 615         #
 616         my $seekPosn = ($fileSize > 1048576 ? 1048576 : $fileSize) - 131072;
 617         $md5->add($data) if ( sysread(N, $data, 131072) );
 618         $md5->add($data) if ( sysseek(N, $seekPosn, 0)
 619                                 && sysread(N, $data, 131072) );
 620     } else {
 621         #
 622         # read the whole file
 623         #
 624         $md5->add($data) if ( sysread(N, $data, $fileSize) );
 625     }
 626     close(N);
 627     return ($md5->hexdigest, $fileSize);
 628 }
 629
 630 #
 631 # Compute the MD5 digest of a buffer (string).  For efficiency we don't
 632 # use the whole string for big strings:
 633 #   - for files <= 256K we use the file size and the whole file.
 634 #   - for files <= 1M we use the file size, the first 128K and
 635 #     the last 128K.
 636 #   - for files > 1M, we use the file size, the first 128K and
 637 #     the 8th 128K (ie: the 128K up to 1MB).
 638 # See the documentation for a discussion of the tradeoffs in
 639 # how much data we use and how many collisions we get.
 640 #
 641 # Returns the MD5 digest (a hex string).
 642 #
 643 sub Buffer2MD5
 644 {
 645     my($bpc, $md5, $fileSize, $dataRef) = @_;
 646
 647     $md5->reset();
 648     $md5->add($fileSize);
 649     if ( $fileSize > 262144 ) {
 650         #
 651         # add the first and last 131072 bytes of the string,
 652         # up to 1MB.
 653         #
 654         my $seekPosn = ($fileSize > 1048576 ? 1048576 : $fileSize) - 131072;
 655         $md5->add(substr($$dataRef, 0, 131072));
 656         $md5->add(substr($$dataRef, $seekPosn, 131072));
 657     } else {
 658         #
 659         # add the whole string
 660         #
 661         $md5->add($$dataRef);
 662     }
 663     return $md5->hexdigest;
 664 }
 665
 666 #
 667 # Given an MD5 digest $d and a compress flag, return the full
 668 # path in the pool.
 669 #
 670 sub MD52Path
 671 {
 672     my($bpc, $d, $compress, $poolDir) = @_;
 673
 674     return if ( $d !~ m{(.)(.)(.)(.*)} );
 675     $poolDir = ($compress ? $bpc->{CPoolDir} : $bpc->{PoolDir})
 676                     if ( !defined($poolDir) );
 677     return "$poolDir/$1/$2/$3/$1$2$3$4";
 678 }
 679
 680 #
 681 # For each file, check if the file exists in $bpc->{TopDir}/pool.
 682 # If so, remove the file and make a hardlink to the file in
 683 # the pool.  Otherwise, if the newFile flag is set, make a
 684 # hardlink in the pool to the new file.
 685 #
 686 # Returns 0 if a link should be made to a new file (ie: when the file
 687 #    is a new file but the newFile flag is 0).
 688 # Returns 1 if a link to an existing file is made,
 689 # Returns 2 if a link to a new file is made (only if $newFile is set)
 690 # Returns negative on error.
 691 #
 692 sub MakeFileLink
 693 {
 694     my($bpc, $name, $d, $newFile, $compress) = @_;
 695     my($i, $rawFile);
 696
 697     return -1 if ( !-f $name );
 698     for ( $i = -1 ; ; $i++ ) {
 699         return -2 if ( !defined($rawFile = $bpc->MD52Path($d, $compress)) );
 700         $rawFile .= "_$i" if ( $i >= 0 );
 701         if ( -f $rawFile ) {
 702             if ( !compare($name, $rawFile) ) {
 703                 unlink($name);
 704                 return -3 if ( !link($rawFile, $name) );
 705                 return 1;
 706             }
 707         } elsif ( $newFile && -f $name && (stat($name))[3] == 1 ) {
 708             my($newDir);
 709             ($newDir = $rawFile) =~ s{(.*)/.*}{$1};
 710             mkpath($newDir, 0, 0777) if ( !-d $newDir );
 711             return -4 if ( !link($name, $rawFile) );
 712             return 2;
 713         } else {
 714             return 0;
 715         }
 716     }
 717 }
 718
 719 sub CheckHostAlive
 720 {
 721     my($bpc, $host) = @_;
 722     my($s, $pingCmd, $ret);
 723
 724     #
 725     # Return success if the ping cmd is undefined or empty.
 726     #
 727     if ( $bpc->{Conf}{PingCmd} eq "" ) {
 728         print("CheckHostAlive: return ok because \$Conf{PingCmd} is empty\n")
 729                         if ( $bpc->{verbose} );
 730         return 0;
 731     }
 732
 733     my $args = {
 734         pingPath => $bpc->{Conf}{PingPath},
 735         host     => $host,
 736     };
 737     $pingCmd = $bpc->cmdVarSubstitute($bpc->{Conf}{PingCmd}, $args);
 738
 739     #
 740     # Do a first ping in case the PC needs to wakeup
 741     #
 742     $s = $bpc->cmdSystemOrEval($pingCmd, undef, $args);
 743     if ( $? ) {
 744         print("CheckHostAlive: first ping failed ($?, $!)\n")
 745                         if ( $bpc->{verbose} );
 746         return -1;
 747     }
 748
 749     #
 750     # Do a second ping and get the round-trip time in msec
 751     #
 752     $s = $bpc->cmdSystemOrEval($pingCmd, undef, $args);
 753     if ( $? ) {
 754         print("CheckHostAlive: second ping failed ($?, $!)\n")
 755                         if ( $bpc->{verbose} );
 756         return -1;
 757     }
 758     if ( $s =~ /time=([\d\.]+)\s*ms/i ) {
 759         $ret = $1;
 760     } elsif ( $s =~ /time=([\d\.]+)\s*usec/i ) {
 761         $ret =  $1/1000;
 762     } else {
 763         print("CheckHostAlive: can't extract round-trip time (not fatal)\n")
 764                                 if ( $bpc->{verbose} );
 765         $ret = 0;
 766     }
 767     print("CheckHostAlive: returning $ret\n") if ( $bpc->{verbose} );
 768     return $ret;
 769 }
 770
 771 sub CheckFileSystemUsage
 772 {
 773     my($bpc) = @_;
 774     my($topDir) = $bpc->{TopDir};
 775     my($s, $dfCmd);
 776
 777     return 0 if ( $bpc->{Conf}{DfCmd} eq "" );
 778     my $args = {
 779         dfPath   => $bpc->{Conf}{DfPath},
 780         topDir   => $bpc->{TopDir},
 781     };
 782     $dfCmd = $bpc->cmdVarSubstitute($bpc->{Conf}{DfCmd}, $args);
 783     $s = $bpc->cmdSystemOrEval($dfCmd, undef, $args);
 784     return 0 if ( $? || $s !~ /(\d+)%/s );
 785     return $1;
 786 }
 787
 788 #
 789 # Given an IP address, return the host name and user name via
 790 # NetBios.
 791 #
 792 sub NetBiosInfoGet
 793 {
 794     my($bpc, $host) = @_;
 795     my($netBiosHostName, $netBiosUserName);
 796     my($s, $nmbCmd);
 797
 798     #
 799     # Skip NetBios check if NmbLookupCmd is emtpy
 800     #
 801     if ( $bpc->{Conf}{NmbLookupCmd} eq "" ) {
 802         print("NetBiosInfoGet: return $host because \$Conf{NmbLookupCmd}"
 803             . " is empty\n")
 804                 if ( $bpc->{verbose} );
 805         return ($host, undef);
 806     }
 807
 808     my $args = {
 809         nmbLookupPath => $bpc->{Conf}{NmbLookupPath},
 810         host          => $host,
 811     };
 812     $nmbCmd = $bpc->cmdVarSubstitute($bpc->{Conf}{NmbLookupCmd}, $args);
 813     foreach ( split(/[\n\r]+/, $bpc->cmdSystemOrEval($nmbCmd, undef, $args)) ) {
 814         next if ( !/^\s*([\w\s-]+?)\s*<(\w{2})\> - .*<ACTIVE>/i );
 815         $netBiosHostName ||= $1 if ( $2 eq "00" );  # host is first 00
 816         $netBiosUserName   = $1 if ( $2 eq "03" );  # user is last 03
 817     }
 818     if ( !defined($netBiosHostName) ) {
 819         print("NetBiosInfoGet: failed: can't parse return string\n")
 820                         if ( $bpc->{verbose} );
 821         return;
 822     }
 823     $netBiosHostName = lc($netBiosHostName);
 824     $netBiosUserName = lc($netBiosUserName);
 825     print("NetBiosInfoGet: success, returning host $netBiosHostName,"
 826         . " user $netBiosUserName\n")
 827                 if ( $bpc->{verbose} );
 828     return ($netBiosHostName, $netBiosUserName);
 829 }
 830
 831 #
 832 # Given a NetBios name lookup the IP address via NetBios.
 833 # In the case of a host returning multiple interfaces we
 834 # return the first IP address that matches the subnet mask.
 835 # If none match the subnet mask (or nmblookup doesn't print
 836 # the subnet mask) then just the first IP address is returned.
 837 #
 838 sub NetBiosHostIPFind
 839 {
 840     my($bpc, $host) = @_;
 841     my($netBiosHostName, $netBiosUserName);
 842     my($s, $nmbCmd, $subnet, $ipAddr, $firstIpAddr);
 843
 844     #
 845     # Skip NetBios lookup if NmbLookupFindHostCmd is emtpy
 846     #
 847     if ( $bpc->{Conf}{NmbLookupFindHostCmd} eq "" ) {
 848         print("NetBiosHostIPFind: return $host because"
 849             . " \$Conf{NmbLookupFindHostCmd} is empty\n")
 850                 if ( $bpc->{verbose} );
 851         return $host;
 852     }
 853
 854     my $args = {
 855         nmbLookupPath => $bpc->{Conf}{NmbLookupPath},
 856         host          => $host,
 857     };
 858     $nmbCmd = $bpc->cmdVarSubstitute($bpc->{Conf}{NmbLookupFindHostCmd}, $args);
 859     foreach my $resp ( split(/[\n\r]+/, $bpc->cmdSystemOrEval($nmbCmd, undef,
 860                                                               $args) ) ) {
 861         if ( $resp =~ /querying\s+\Q$host\E\s+on\s+(\d+\.\d+\.\d+\.\d+)/i ) {
 862             $subnet = $1;
 863             $subnet = $1 if ( $subnet =~ /^(.*?)(\.255)+$/ );
 864         } elsif ( $resp =~ /^\s*(\d+\.\d+\.\d+\.\d+)\s+\Q$host/ ) {
 865             my $ip = $1;
 866             $firstIpAddr = $ip if ( !defined($firstIpAddr) );
 867             $ipAddr      = $ip if ( !defined($ipAddr) && $ip =~ /^\Q$subnet/ );
 868         }
 869     }
 870     $ipAddr = $firstIpAddr if ( !defined($ipAddr) );
 871     if ( defined($ipAddr) ) {
 872         print("NetBiosHostIPFind: found IP address $ipAddr for host $host\n")
 873                         if ( $bpc->{verbose} );
 874         return $ipAddr;
 875     } else {
 876         print("NetBiosHostIPFind: couldn't find IP address for host $host\n")
 877                         if ( $bpc->{verbose} );
 878         return;
 879     }
 880 }
 881
 882 sub fileNameEltMangle
 883 {
 884     my($bpc, $name) = @_;
 885
 886     return "" if ( $name eq "" );
 887     $name =~ s{([%/\n\r])}{sprintf("%%%02x", ord($1))}eg;
 888     return "f$name";
 889 }
 890
 891 #
 892 # We store files with every name preceded by "f".  This
 893 # avoids possible name conflicts with other information
 894 # we store in the same directories (eg: attribute info).
 895 # The process of turning a normal path into one with each
 896 # node prefixed with "f" is called mangling.
 897 #
 898 sub fileNameMangle
 899 {
 900     my($bpc, $name) = @_;
 901
 902     $name =~ s{/([^/]+)}{"/" . $bpc->fileNameEltMangle($1)}eg;
 903     $name =~ s{^([^/]+)}{$bpc->fileNameEltMangle($1)}eg;
 904     return $name;
 905 }
 906
 907 #
 908 # This undoes FileNameMangle
 909 #
 910 sub fileNameUnmangle
 911 {
 912     my($bpc, $name) = @_;
 913
 914     $name =~ s{/f}{/}g;
 915     $name =~ s{^f}{};
 916     $name =~ s{%(..)}{chr(hex($1))}eg;
 917     return $name;
 918 }
 919
 920 #
 921 # Escape shell meta-characters with backslashes.
 922 # This should be applied to each argument seperately, not an
 923 # entire shell command.
 924 #
 925 sub shellEscape
 926 {
 927     my($bpc, $cmd) = @_;
 928
 929     $cmd =~ s/([][;&()<>{}|^\n\r\t *\$\\'"`?])/\\$1/g;
 930     return $cmd;
 931 }
 932
 933 #
 934 # For printing exec commands (which don't use a shell) so they look like
 935 # a valid shell command this function should be called with the exec
 936 # args.  The shell command string is returned.
 937 #
 938 sub execCmd2ShellCmd
 939 {
 940     my($bpc, @args) = @_;
 941     my $str;
 942
 943     foreach my $a ( @args ) {
 944         $str .= " " if ( $str ne "" );
 945         $str .= $bpc->shellEscape($a);
 946     }
 947     return $str;
 948 }
 949
 950 #
 951 # Do a URI-style escape to protect/encode special characters
 952 #
 953 sub uriEsc
 954 {
 955     my($bpc, $s) = @_;
 956     $s =~ s{([^\w.\/-])}{sprintf("%%%02X", ord($1));}eg;
 957     return $s;
 958 }
 959
 960 #
 961 # Do a URI-style unescape to restore special characters
 962 #
 963 sub uriUnesc
 964 {
 965     my($bpc, $s) = @_;
 966     $s =~ s{%(..)}{chr(hex($1))}eg;
 967     return $s;
 968 }
 969
 970 #
 971 # Do variable substitution prior to execution of a command.
 972 #
 973 sub cmdVarSubstitute
 974 {
 975     my($bpc, $template, $vars) = @_;
 976     my(@cmd);
 977
 978     #
 979     # Return without any substitution if the first entry starts with "&",
 980     # indicating this is perl code.
 981     #
 982     if ( (ref($template) eq "ARRAY" ? $template->[0] : $template) =~ /^\&/ ) {
 983         return $template;
 984     }
 985     if ( ref($template) ne "ARRAY" ) {
 986         #
 987         # Split at white space, except if escaped by \
 988         #
 989         $template = [split(/(?<!\\)\s+/, $template)];
 990         #
 991         # Remove the \ that escaped white space.
 992         #
 993         foreach ( @$template ) {
 994             s{\\(\s)}{$1}g;
 995         }
 996     }
 997     #
 998     # Merge variables into @tarClientCmd
 999     #
1000     foreach my $arg ( @$template ) {
1001         #
1002         # Replace scalar variables first
1003         #
1004         $arg =~ s{\$(\w+)(\+?)}{
1005             exists($vars->{$1}) && ref($vars->{$1}) ne "ARRAY"
1006                 ? ($2 eq "+" ? $bpc->shellEscape($vars->{$1}) : $vars->{$1})
1007                 : "\$$1"
1008         }eg;
1009         #
1010         # Now replicate any array arguments; this just works for just one
1011         # array var in each argument.
1012         #
1013         if ( $arg =~ m{(.*)\$(\w+)(\+?)(.*)} && ref($vars->{$2}) eq "ARRAY" ) {
1014             my $pre  = $1;
1015             my $var  = $2;
1016             my $esc  = $3;
1017             my $post = $4;
1018             foreach my $v ( @{$vars->{$var}} ) {
1019                 $v = $bpc->shellEscape($v) if ( $esc eq "+" );
1020                 push(@cmd, "$pre$v$post");
1021             }
1022         } else {
1023             push(@cmd, $arg);
1024         }
1025     }
1026     return \@cmd;
1027 }
1028
1029 #
1030 # Exec or eval a command.  $cmd is either a string on an array ref.
1031 #
1032 # @args are optional arguments for the eval() case; they are not used
1033 # for exec().
1034 #
1035 sub cmdExecOrEval
1036 {
1037     my($bpc, $cmd, @args) = @_;
1038
1039     if ( (ref($cmd) eq "ARRAY" ? $cmd->[0] : $cmd) =~ /^\&/ ) {
1040         $cmd = join(" ", $cmd) if ( ref($cmd) eq "ARRAY" );
1041         print("cmdExecOrEval: about to eval perl code $cmd\n")
1042                         if ( $bpc->{verbose} );
1043         eval($cmd);
1044         print(STDERR "Perl code fragment for exec shouldn't return!!\n");
1045         exit(1);
1046     } else {
1047         $cmd = [split(/\s+/, $cmd)] if ( ref($cmd) ne "ARRAY" );
1048         print("cmdExecOrEval: about to exec ",
1049               $bpc->execCmd2ShellCmd(@$cmd), "\n")
1050                         if ( $bpc->{verbose} );
1051         exec(@$cmd);
1052         print(STDERR "Exec failed for @$cmd\n");
1053         exit(1);
1054     }
1055 }
1056
1057 #
1058 # System or eval a command.  $cmd is either a string on an array ref.
1059 # $stdoutCB is a callback for output generated by the command.  If it
1060 # is undef then output is returned.  If it is a code ref then the function
1061 # is called with each piece of output as an argument.  If it is a scalar
1062 # ref the output is appended to this variable.
1063 #
1064 # @args are optional arguments for the eval() case; they are not used
1065 # for system().
1066 #
1067 # Also, $? should be set when the CHILD pipe is closed.
1068 #
1069 sub cmdSystemOrEval
1070 {
1071     my($bpc, $cmd, $stdoutCB, @args) = @_;
1072     my($pid, $out, $allOut);
1073     local(*CHILD);
1074
1075     if ( (ref($cmd) eq "ARRAY" ? $cmd->[0] : $cmd) =~ /^\&/ ) {
1076         $cmd = join(" ", $cmd) if ( ref($cmd) eq "ARRAY" );
1077         print("cmdSystemOrEval: about to eval perl code $cmd\n")
1078                         if ( $bpc->{verbose} );
1079         $out = eval($cmd);
1080         $$stdoutCB .= $out if ( ref($stdoutCB) eq 'SCALAR' );
1081         &$stdoutCB($out)   if ( ref($stdoutCB) eq 'CODE' );
1082         print("cmdSystemOrEval: finished: got output $out\n")
1083                         if ( $bpc->{verbose} );
1084         return $out        if ( !defined($stdoutCB) );
1085         return;
1086     } else {
1087         $cmd = [split(/\s+/, $cmd)] if ( ref($cmd) ne "ARRAY" );
1088         print("cmdSystemOrEval: about to system ",
1089               $bpc->execCmd2ShellCmd(@$cmd), "\n")
1090                         if ( $bpc->{verbose} );
1091         if ( !defined($pid = open(CHILD, "-|")) ) {
1092             my $err = "Can't fork to run @$cmd\n";
1093             $? = 1;
1094             $$stdoutCB .= $err if ( ref($stdoutCB) eq 'SCALAR' );
1095             &$stdoutCB($err)   if ( ref($stdoutCB) eq 'CODE' );
1096             return $err        if ( !defined($stdoutCB) );
1097             return;
1098         }
1099         if ( !$pid ) {
1100             #
1101             # This is the child
1102             #
1103             close(STDERR);
1104             open(STDERR, ">&STDOUT");
1105             exec(@$cmd);
1106             print("Exec of @$cmd failed\n");
1107             exit(1);
1108         }
1109         #
1110         # The parent gathers the output from the child
1111         #
1112         while ( <CHILD> ) {
1113             $$stdoutCB .= $_ if ( ref($stdoutCB) eq 'SCALAR' );
1114             &$stdoutCB($_)   if ( ref($stdoutCB) eq 'CODE' );
1115             $out .= $_       if ( !defined($stdoutCB) );
1116             $allOut .= $_    if ( $bpc->{verbose} );
1117         }
1118         $? = 0;
1119         close(CHILD);
1120     }
1121     print("cmdSystemOrEval: finished: got output $allOut\n")
1122                         if ( $bpc->{verbose} );
1123     return $out;
1124 }
1125
1126 1;