X-Git-Url: http://git.rot13.org/?p=BackupPC.git;a=blobdiff_plain;f=lib%2FBackupPC%2FXfer%2FRsyncFileIO.pm;h=3a0e82617e52947971fd6cb4d29f6dfed8d1a363;hp=cd0dc82dc7eb401009a346787b65185d29b9420b;hb=2460880d23902e74d5d8541f9c2fbb9538eaf4cb;hpb=3ec73efe5ee035c9cda24dc70500157e455d9c84 diff --git a/lib/BackupPC/Xfer/RsyncFileIO.pm b/lib/BackupPC/Xfer/RsyncFileIO.pm index cd0dc82..3a0e826 100644 --- a/lib/BackupPC/Xfer/RsyncFileIO.pm +++ b/lib/BackupPC/Xfer/RsyncFileIO.pm @@ -8,11 +8,11 @@ # Craig Barratt # # COPYRIGHT -# Copyright (C) 2002 Craig Barratt +# Copyright (C) 2002-2003 Craig Barratt # #======================================================================== # -# Version 2.0.0beta2, released 13 Apr 2003. +# Version 2.1.0_CVS, released 13 Mar 2004. # # See http://backuppc.sourceforge.net. # @@ -24,9 +24,8 @@ use strict; use File::Path; use BackupPC::Attrib qw(:all); use BackupPC::View; +use BackupPC::Xfer::RsyncDigest; use BackupPC::PoolWrite; -use BackupPC::PoolWrite; -use Data::Dumper; use constant S_IFMT => 0170000; # type of file use constant S_IFDIR => 0040000; # directory @@ -82,6 +81,7 @@ sub new $fio->{backups}); $fio->{full} = $fio->{xfer}{type} eq "full" ? 1 : 0; $fio->{newFilesFH} = $fio->{xfer}{newFilesFH}; + $fio->{partialNum} = undef if ( !$fio->{full} ); return $fio; } @@ -104,25 +104,23 @@ sub logHandlerSet # sub csumStart { - my($fio, $f, $needMD4) = @_; + my($fio, $f, $needMD4, $defBlkSize) = @_; my $attr = $fio->attribGet($f); $fio->{file} = $f; - $fio->csumEnd if ( defined($fio->{fh}) ); - return if ( $attr->{type} != BPC_FTYPE_FILE ); - if ( !defined($fio->{fh} = BackupPC::FileZIO->open($attr->{fullPath}, - 0, - $attr->{compress})) ) { - $fio->log("Can't open $attr->{fullPath} (name=$f->{name})"); + $fio->csumEnd if ( defined($fio->{csum}) ); + return -1 if ( $attr->{type} != BPC_FTYPE_FILE ); + (my $err, $fio->{csum}, my $blkSize) + = BackupPC::Xfer::RsyncDigest->digestStart($attr->{fullPath}, + $attr->{size}, 0, $defBlkSize, $fio->{checksumSeed}, + $needMD4, $attr->{compress}, 1); + if ( $err ) { + $fio->log("Can't get rsync digests from $attr->{fullPath}" + . " (err=$err, name=$f->{name})"); $fio->{stats}{errorCnt}++; return -1; } - if ( $needMD4) { - $fio->{csumDigest} = File::RsyncP::Digest->new; - $fio->{csumDigest}->add(pack("V", $fio->{checksumSeed})); - } else { - delete($fio->{csumDigest}); - } + return $blkSize; } sub csumGet @@ -132,40 +130,16 @@ sub csumGet $num ||= 100; $csumLen ||= 16; - - return if ( !defined($fio->{fh}) ); - if ( $fio->{fh}->read(\$fileData, $blockSize * $num) <= 0 ) { - $fio->log("$fio->{file}{name}: csumGet is at EOF - zero padding"); - $fio->{stats}{errorCnt}++; - $fileData = pack("c", 0) x ($blockSize * $num); - } - $fio->{csumDigest}->add($fileData) if ( defined($fio->{csumDigest}) ); - $fio->log(sprintf("%s: getting csum ($num,$csumLen,%d,0x%x)", - $fio->{file}{name}, - length($fileData), - $fio->{checksumSeed})) - if ( $fio->{logLevel} >= 9 ); - return $fio->{digest}->blockDigest($fileData, $blockSize, - $csumLen, $fio->{checksumSeed}); + return if ( !defined($fio->{csum}) ); + return $fio->{csum}->digestGet($num, $csumLen); } sub csumEnd { my($fio) = @_; - return if ( !defined($fio->{fh}) ); - # - # make sure we read the entire file for the file MD4 digest - # - if ( defined($fio->{csumDigest}) ) { - my $fileData; - while ( $fio->{fh}->read(\$fileData, 65536) > 0 ) { - $fio->{csumDigest}->add($fileData); - } - } - $fio->{fh}->close(); - delete($fio->{fh}); - return $fio->{csumDigest}->digest if ( defined($fio->{csumDigest}) ); + return if ( !defined($fio->{csum}) ); + return $fio->{csum}->digestEnd(); } sub readStart @@ -216,6 +190,10 @@ sub checksumSeed my($fio, $checksumSeed) = @_; $fio->{checksumSeed} = $checksumSeed; + $fio->log("Checksum caching enabled (checksumSeed = $checksumSeed)") + if ( $fio->{logLevel} >= 1 && $checksumSeed == 32761 ); + $fio->log("Checksum seed is $checksumSeed") + if ( $fio->{logLevel} >= 2 && $checksumSeed != 32761 ); } sub dirs @@ -252,9 +230,19 @@ sub viewCacheDir # $fio->{viewCache}{$shareM} = $fio->{view}->dirAttrib($fio->{viewNum}, $share, $dir); + # + # also cache partial backup attrib data too + # + if ( defined($fio->{partialNum}) ) { + foreach my $d ( keys(%{$fio->{partialCache}}) ) { + delete($fio->{partialCache}{$d}) if ( $shareM !~ m{^\Q$d/} ); + } + $fio->{partialCache}{$shareM} + = $fio->{view}->dirAttrib($fio->{partialNum}, $share, $dir); + } } -sub attribGet +sub attribGetWhere { my($fio, $f) = @_; my($dir, $fname, $share, $shareM); @@ -278,7 +266,21 @@ sub attribGet } $fio->viewCacheDir($share, $dir); $shareM .= "/$dir" if ( $dir ne "" ); - return $fio->{viewCache}{$shareM}{$fname}; + if ( defined(my $attr = $fio->{viewCache}{$shareM}{$fname}) ) { + return ($attr, 0); + } elsif ( defined(my $attr = $fio->{partialCache}{$shareM}{$fname}) ) { + return ($attr, 1); + } else { + return; + } +} + +sub attribGet +{ + my($fio, $f) = @_; + + my($attr) = $fio->attribGetWhere($f); + return $attr; } sub mode2type @@ -375,9 +377,10 @@ sub attribWrite return if ( !defined($fio->{attrib}{$d}) ); # # Set deleted files in the attributes. Any file in the view - # that doesn't have attributes is deleted. All files sent by - # rsync have attributes temporarily set so we can do deletion - # detection. We also prune these temporary attributes. + # that doesn't have attributes is flagged as deleted for + # incremental dumps. All files sent by rsync have attributes + # temporarily set so we can do deletion detection. We also + # prune these temporary attributes. # if ( $d ne "" ) { my $dir; @@ -407,7 +410,7 @@ sub attribWrite name => $name, }) if ( $fio->{logLevel} >= 2 ); } - } else { + } elsif ( !$fio->{full} ) { ##print("Delete file $f\n"); $fio->logFileAction("delete", { %{$fio->{viewCache}{$d}{$f}}, @@ -604,13 +607,46 @@ sub logFileAction } # -# Later we'll use this function to complete a prior unfinished dump. -# We'll do an incremental on the part we have already, and then a -# full or incremental against the rest. +# If there is a partial and we are doing a full, we do an incremental +# against the partial and a full against the rest. This subroutine +# is how we tell File::RsyncP which files to ignore attributes on +# (ie: against the partial dump we do consider the attributes, but +# otherwise we ignore attributes). # sub ignoreAttrOnFile { - return undef; + my($fio, $f) = @_; + + return if ( !defined($fio->{partialNum}) ); + my($attr, $isPartial) = $fio->attribGetWhere($f); + $fio->log("$f->{name}: just checking attributes from partial") + if ( $isPartial && $fio->{logLevel} >= 5 ); + return !$isPartial; +} + +# +# This is called by File::RsyncP when a file is skipped because the +# attributes match. +# +sub attrSkippedFile +{ + my($fio, $f, $attr) = @_; + + # + # Unless this is a partial, this is normal so ignore it. + # + return if ( !defined($fio->{partialNum}) ); + + $fio->log("$f->{name}: skipped in partial; adding link") + if ( $fio->{logLevel} >= 5 ); + $fio->{rxLocalAttr} = $attr; + $fio->{rxFile} = $f; + $fio->{rxSize} = $attr->{size}; + delete($fio->{rxInFd}); + delete($fio->{rxOutFd}); + delete($fio->{rxDigest}); + delete($fio->{rxInData}); + return $fio->fileDeltaRxDone(); } # @@ -696,8 +732,7 @@ sub fileDeltaRxNext # Need to copy the sequence of blocks that matched. If the file # is compressed we need to make a copy of the uncompressed file, # since the compressed file is not seekable. Future optimizations - # would be to keep the uncompressed file in memory (eg, up to say - # 10MB), only create an uncompressed copy if the matching + # could include only creating an uncompressed copy if the matching # blocks were not monotonic, and to only do this if there are # matching blocks (eg, maybe the entire file is new). # @@ -734,6 +769,7 @@ sub fileDeltaRxNext if ( open(F, "+>", "$fio->{outDirSh}RStmp") ) { my $data; my $byteCnt = 0; + binmode(F); while ( $fh->read(\$data, 1024 * 1024) > 0 ) { if ( syswrite(F, $data) != length($data) ) { $fio->log(sprintf("Can't write len=%d to %s", @@ -760,6 +796,7 @@ sub fileDeltaRxNext $fh->close; } else { if ( open(F, "<", $attr->{fullPath}) ) { + binmode(F); $fio->{rxInFd} = *F; $fio->{rxInName} = $attr->{fullPath}; } else { @@ -840,51 +877,70 @@ sub fileDeltaRxDone { my($fio, $md4) = @_; my $name = $1 if ( $fio->{rxFile}{name} =~ /(.*)/ ); + my $ret; - if ( !defined($fio->{rxDigest}) ) { - # - # File was exact match, but we still need to verify the - # MD4 checksum. Therefore open and read the file. - # - $fio->{rxDigest} = File::RsyncP::Digest->new; - $fio->{rxDigest}->add(pack("V", $fio->{checksumSeed})); - my $attr = $fio->{rxLocalAttr}; - if ( defined($attr) ) { - if ( defined(my $fh = BackupPC::FileZIO->open( - $attr->{fullPath}, - 0, - $attr->{compress})) ) { - my $data; - while ( $fh->read(\$data, 4 * 65536) > 0 ) { - $fio->{rxDigest}->add($data); - $fio->{rxSize} += length($data); - } - $fh->close; - } else { - $fio->log("Can't open $attr->{fullPath} for MD4 check ($name)"); - $fio->{stats}{errorCnt}++; - } - } - $fio->log("$name got exact match") - if ( $fio->{logLevel} >= 5 ); - } close($fio->{rxInFd}) if ( defined($fio->{rxInFd}) ); unlink("$fio->{outDirSh}RStmp") if ( -f "$fio->{outDirSh}RStmp" ); - my $newDigest = $fio->{rxDigest}->digest; - if ( $fio->{logLevel} >= 3 ) { - my $md4Str = unpack("H*", $md4); - my $newStr = unpack("H*", $newDigest); - $fio->log("$name got digests $md4Str vs $newStr") - } - if ( $md4 ne $newDigest ) { - $fio->log("$name: fatal error: md4 doesn't match"); - $fio->{stats}{errorCnt}++; - if ( defined($fio->{rxOutFd}) ) { - $fio->{rxOutFd}->close; - unlink($fio->{rxOutFile}); + + # + # Check the final md4 digest + # + if ( defined($md4) ) { + my $newDigest; + if ( !defined($fio->{rxDigest}) ) { + # + # File was exact match, but we still need to verify the + # MD4 checksum. Compute the md4 digest (or fetch the + # cached one.) + # + if ( defined(my $attr = $fio->{rxLocalAttr}) ) { + # + # block size doesn't matter: we're only going to + # fetch the md4 file digest, not the block digests. + # + my($err, $csum, $blkSize) + = BackupPC::Xfer::RsyncDigest->digestStart( + $attr->{fullPath}, $attr->{size}, + 0, 2048, $fio->{checksumSeed}, 1, + $attr->{compress}); + if ( $err ) { + $fio->log("Can't open $attr->{fullPath} for MD4" + . " check (err=$err, $name)"); + $fio->{stats}{errorCnt}++; + } else { + $newDigest = $csum->digestEnd; + } + $fio->{rxSize} = $attr->{size}; + } else { + # + # Empty file; just create an empty file digest + # + $fio->{rxDigest} = File::RsyncP::Digest->new; + $fio->{rxDigest}->add(pack("V", $fio->{checksumSeed})); + $newDigest = $fio->{rxDigest}->digest; + } + $fio->log("$name got exact match") if ( $fio->{logLevel} >= 5 ); + } else { + $newDigest = $fio->{rxDigest}->digest; + } + if ( $fio->{logLevel} >= 3 ) { + my $md4Str = unpack("H*", $md4); + my $newStr = unpack("H*", $newDigest); + $fio->log("$name got digests $md4Str vs $newStr") + } + if ( $md4 ne $newDigest ) { + $fio->log("$name: fatal error: md4 doesn't match"); + $fio->{stats}{errorCnt}++; + if ( defined($fio->{rxOutFd}) ) { + $fio->{rxOutFd}->close; + unlink($fio->{rxOutFile}); + } + delete($fio->{rxFile}); + delete($fio->{rxOutFile}); + return 1; } - return 1; } + # # One special case is an empty file: if the file size is # zero we need to open the output file to create it. @@ -922,32 +978,34 @@ sub fileDeltaRxDone if ( !link($attr->{fullPath}, $rxOutFile) ) { $fio->log("Unable to link $attr->{fullPath} to $rxOutFile"); $fio->{stats}{errorCnt}++; - return -1; - } - # - # Cumulate the stats - # - $fio->{stats}{TotalFileCnt}++; - $fio->{stats}{TotalFileSize} += $fio->{rxSize}; - $fio->{stats}{ExistFileCnt}++; - $fio->{stats}{ExistFileSize} += $fio->{rxSize}; - $fio->{stats}{ExistFileCompSize} += -s $rxOutFile; - $fio->{rxFile}{size} = $fio->{rxSize}; - return $fio->attribSet($fio->{rxFile}); + $ret = -1; + } else { + # + # Cumulate the stats + # + $fio->{stats}{TotalFileCnt}++; + $fio->{stats}{TotalFileSize} += $fio->{rxSize}; + $fio->{stats}{ExistFileCnt}++; + $fio->{stats}{ExistFileSize} += $fio->{rxSize}; + $fio->{stats}{ExistFileCompSize} += -s $rxOutFile; + $fio->{rxFile}{size} = $fio->{rxSize}; + $ret = $fio->attribSet($fio->{rxFile}); + } } - } - if ( defined($fio->{rxOutFd}) ) { + } else { my $exist = $fio->processClose($fio->{rxOutFd}, $fio->{rxOutFileRel}, $fio->{rxSize}, 1); $fio->logFileAction($exist ? "pool" : "create", $fio->{rxFile}) if ( $fio->{logLevel} >= 1 ); $fio->{rxFile}{size} = $fio->{rxSize}; - return $fio->attribSet($fio->{rxFile}); + $ret = $fio->attribSet($fio->{rxFile}); } delete($fio->{rxDigest}); delete($fio->{rxInData}); - return; + delete($fio->{rxFile}); + delete($fio->{rxOutFile}); + return $ret; } # @@ -968,26 +1026,29 @@ sub fileListEltSend || $type == BPC_FTYPE_BLOCKDEV || $type == BPC_FTYPE_SYMLINK ) { my $fh = BackupPC::FileZIO->open($a->{fullPath}, 0, $a->{compress}); - my $str; + my($str, $rdSize); if ( defined($fh) ) { - if ( $fh->read(\$str, $a->{size} + 1) == $a->{size} ) { - if ( $type == BPC_FTYPE_SYMLINK ) { - # - # Reconstruct symbolic link - # - $extraAttribs = { link => $str }; - } elsif ( $str =~ /(\d*),(\d*)/ ) { - # - # Reconstruct char or block special major/minor device num - # - $extraAttribs = { rdev => $1 * 256 + $2 }; - } else { - $fio->log("$name: unexpected special file contents $str"); + $rdSize = $fh->read(\$str, $a->{size} + 1024); + if ( $type == BPC_FTYPE_SYMLINK ) { + # + # Reconstruct symbolic link + # + $extraAttribs = { link => $str }; + if ( $rdSize != $a->{size} ) { + # ERROR + $fio->log("$name: can't read exactly $a->{size} bytes"); $fio->{stats}{errorCnt}++; } + } elsif ( $str =~ /(\d*),(\d*)/ ) { + # + # Reconstruct char or block special major/minor device num + # + # Note: char/block devices have $a->{size} = 0, so we + # can't do an error check on $rdSize. + # + $extraAttribs = { rdev => $1 * 256 + $2 }; } else { - # ERROR - $fio->log("$name: can't read exactly $a->{size} bytes"); + $fio->log("$name: unexpected special file contents $str"); $fio->{stats}{errorCnt}++; } $fh->close; @@ -1044,10 +1105,22 @@ sub finish { my($fio, $isChild) = @_; + # + # If we are aborting early, remove the last file since + # it was not complete + # + if ( $isChild && defined($fio->{rxFile}) ) { + unlink("$fio->{outDirSh}RStmp") if ( -f "$fio->{outDirSh}RStmp" ); + if ( defined($fio->{rxFile}) ) { + unlink($fio->{rxOutFile}); + $fio->log("finish: removing in-process file $fio->{rxFile}{name}"); + } + } + # # Flush the attributes if this is the child # - $fio->attribWrite(undef); + $fio->attribWrite(undef) if ( $isChild ); } #sub is_tainted