# Craig Barratt <cbarratt@users.sourceforge.net>
#
# COPYRIGHT
-# Copyright (C) 2002 Craig Barratt
+# Copyright (C) 2002-2003 Craig Barratt
#
#========================================================================
#
-# Version 2.0.0beta1, released 30 Mar 2003.
+# Version 3.0.0, released 28 Jan 2007.
#
# See http://backuppc.sourceforge.net.
#
use strict;
use File::Path;
+use Encode qw/from_to/;
use BackupPC::Attrib qw(:all);
use BackupPC::View;
+use BackupPC::Xfer::RsyncDigest qw(:all);
use BackupPC::PoolWrite;
-use BackupPC::PoolWrite;
-use Data::Dumper;
-use constant S_IFMT => 0170000; # type of file
-use constant S_IFDIR => 0040000; # directory
-use constant S_IFCHR => 0020000; # character special
-use constant S_IFBLK => 0060000; # block special
-use constant S_IFREG => 0100000; # regular
-use constant S_IFLNK => 0120000; # symbolic link
-use constant S_IFSOCK => 0140000; # socket
-use constant S_IFIFO => 0010000; # fifo
+use constant S_HLINK_TARGET => 0400000; # this file is hardlink target
+use constant S_IFMT => 0170000; # type of file
+use constant S_IFDIR => 0040000; # directory
+use constant S_IFCHR => 0020000; # character special
+use constant S_IFBLK => 0060000; # block special
+use constant S_IFREG => 0100000; # regular
+use constant S_IFLNK => 0120000; # symbolic link
+use constant S_IFSOCK => 0140000; # socket
+use constant S_IFIFO => 0010000; # fifo
use vars qw( $RsyncLibOK );
my $fio = bless {
blockSize => 700,
logLevel => 0,
- digest => File::RsyncP::Digest->new,
+ digest => File::RsyncP::Digest->new(),
checksumSeed => 0,
attrib => {},
logHandler => \&logHandler,
%$options,
}, $class;
+ $fio->{digest}->protocol($fio->{protocol_version});
$fio->{shareM} = $fio->{bpc}->fileNameEltMangle($fio->{share});
$fio->{outDir} = "$fio->{xfer}{outDir}/new/";
$fio->{outDirSh} = "$fio->{outDir}/$fio->{shareM}/";
$fio->{backups});
$fio->{full} = $fio->{xfer}{type} eq "full" ? 1 : 0;
$fio->{newFilesFH} = $fio->{xfer}{newFilesFH};
+ $fio->{partialNum} = undef if ( !$fio->{full} );
return $fio;
}
+#
+# We publish our version to File::RsyncP. This is so File::RsyncP
+# can provide backward compatibility to older FileIO code.
+#
+# Versions:
+#
+# undef or 1: protocol version 26, no hardlinks
+# 2: protocol version 28, supports hardlinks
+#
+sub version
+{
+ return 2;
+}
+
sub blockSize
{
my($fio, $value) = @_;
return $fio->{blockSize};
}
+sub protocol_version
+{
+ my($fio, $value) = @_;
+
+ if ( defined($value) ) {
+ $fio->{protocol_version} = $value;
+ $fio->{digest}->protocol($fio->{protocol_version});
+ }
+ return $fio->{protocol_version};
+}
+
+sub preserve_hard_links
+{
+ my($fio, $value) = @_;
+
+ $fio->{preserve_hard_links} = $value if ( defined($value) );
+ return $fio->{preserve_hard_links};
+}
+
sub logHandlerSet
{
my($fio, $sub) = @_;
$fio->{logHandler} = $sub;
+ BackupPC::Xfer::RsyncDigest->logHandlerSet($sub);
}
#
#
sub csumStart
{
- my($fio, $f, $needMD4) = @_;
+ my($fio, $f, $needMD4, $defBlkSize, $phase) = @_;
- my $attr = $fio->attribGet($f);
+ $defBlkSize ||= $fio->{blockSize};
+ my $attr = $fio->attribGet($f, 1);
$fio->{file} = $f;
- $fio->csumEnd if ( defined($fio->{fh}) );
- return if ( $attr->{type} != BPC_FTYPE_FILE );
- if ( !defined($fio->{fh} = BackupPC::FileZIO->open($attr->{fullPath},
- 0,
- $attr->{compress})) ) {
- $fio->log("Can't open $attr->{fullPath} (name=$f->{name})");
+ $fio->csumEnd if ( defined($fio->{csum}) );
+ return -1 if ( $attr->{type} != BPC_FTYPE_FILE );
+
+ #
+ # Rsync uses short checksums on the first phase. If the whole-file
+ # checksum fails, then the file is repeated with full checksums.
+ # So on phase 2 we verify the checksums if they are cached.
+ #
+ if ( ($phase > 0 || rand(1) < $fio->{cacheCheckProb})
+ && $attr->{compress}
+ && $fio->{checksumSeed} == RSYNC_CSUMSEED_CACHE ) {
+ my($err, $d, $blkSize) = BackupPC::Xfer::RsyncDigest->digestStart(
+ $attr->{fullPath}, $attr->{size}, 0,
+ $defBlkSize, $fio->{checksumSeed},
+ 0, $attr->{compress}, 0,
+ $fio->{protocol_version});
+ my($isCached, $isInvalid) = $d->isCached;
+ if ( $fio->{logLevel} >= 5 ) {
+ $fio->log("$attr->{fullPath} verify; cached = $isCached,"
+ . " invalid = $isInvalid, phase = $phase");
+ }
+ if ( $isCached || $isInvalid ) {
+ my $ret = BackupPC::Xfer::RsyncDigest->digestAdd(
+ $attr->{fullPath}, $blkSize,
+ $fio->{checksumSeed}, 1, # verify
+ $fio->{protocol_version}
+ );
+ if ( $ret != 1 ) {
+ $fio->log("Bad cached digest for $attr->{fullPath} ($ret);"
+ . " fixed");
+ $fio->{stats}{errorCnt}++;
+ } else {
+ $fio->log("$f->{name}: verified cached digest")
+ if ( $fio->{logLevel} >= 2 );
+ }
+ }
+ $d->digestEnd;
+ }
+ (my $err, $fio->{csum}, my $blkSize)
+ = BackupPC::Xfer::RsyncDigest->digestStart($attr->{fullPath},
+ $attr->{size}, 0, $defBlkSize, $fio->{checksumSeed},
+ $needMD4, $attr->{compress}, 1, $fio->{protocol_version});
+ if ( $err ) {
+ $fio->log("Can't get rsync digests from $attr->{fullPath}"
+ . " (err=$err, name=$f->{name})");
$fio->{stats}{errorCnt}++;
return -1;
}
- if ( $needMD4) {
- $fio->{csumDigest} = File::RsyncP::Digest->new;
- $fio->{csumDigest}->add(pack("V", $fio->{checksumSeed}));
- } else {
- delete($fio->{csumDigest});
+ if ( $fio->{logLevel} >= 5 ) {
+ my($isCached, $invalid) = $fio->{csum}->isCached;
+ $fio->log("$attr->{fullPath} cache = $isCached,"
+ . " invalid = $invalid, phase = $phase");
}
+ return $blkSize;
}
sub csumGet
$num ||= 100;
$csumLen ||= 16;
-
- return if ( !defined($fio->{fh}) );
- if ( $fio->{fh}->read(\$fileData, $blockSize * $num) <= 0 ) {
- $fio->log("$fio->{file}{name}: csumGet is at EOF - zero padding");
- $fio->{stats}{errorCnt}++;
- $fileData = pack("c", 0) x ($blockSize * $num);
- }
- $fio->{csumDigest}->add($fileData) if ( defined($fio->{csumDigest}) );
- $fio->log(sprintf("%s: getting csum ($num,$csumLen,%d,0x%x)",
- $fio->{file}{name},
- length($fileData),
- $fio->{checksumSeed}))
- if ( $fio->{logLevel} >= 9 );
- return $fio->{digest}->blockDigest($fileData, $blockSize,
- $csumLen, $fio->{checksumSeed});
+ return if ( !defined($fio->{csum}) );
+ return $fio->{csum}->digestGet($num, $csumLen);
}
sub csumEnd
{
my($fio) = @_;
- return if ( !defined($fio->{fh}) );
- #
- # make sure we read the entire file for the file MD4 digest
- #
- if ( defined($fio->{csumDigest}) ) {
- my $fileData;
- while ( $fio->{fh}->read(\$fileData, 65536) > 0 ) {
- $fio->{csumDigest}->add($fileData);
- }
- }
- $fio->{fh}->close();
- delete($fio->{fh});
- return $fio->{csumDigest}->digest if ( defined($fio->{csumDigest}) );
+ return if ( !defined($fio->{csum}) );
+ return $fio->{csum}->digestEnd();
}
sub readStart
{
my($fio, $f) = @_;
- my $attr = $fio->attribGet($f);
+ my $attr = $fio->attribGet($f, 1);
$fio->{file} = $f;
$fio->readEnd if ( defined($fio->{fh}) );
if ( !defined($fio->{fh} = BackupPC::FileZIO->open($attr->{fullPath},
my($fio, $checksumSeed) = @_;
$fio->{checksumSeed} = $checksumSeed;
+ $fio->log("Checksum caching enabled (checksumSeed = $checksumSeed)")
+ if ( $fio->{logLevel} >= 1 && $checksumSeed == RSYNC_CSUMSEED_CACHE );
+ $fio->log("Checksum seed is $checksumSeed")
+ if ( $fio->{logLevel} >= 2 && $checksumSeed != RSYNC_CSUMSEED_CACHE );
}
sub dirs
#
$fio->{viewCache}{$shareM}
= $fio->{view}->dirAttrib($fio->{viewNum}, $share, $dir);
+ #
+ # also cache partial backup attrib data too
+ #
+ if ( defined($fio->{partialNum}) ) {
+ foreach my $d ( keys(%{$fio->{partialCache}}) ) {
+ delete($fio->{partialCache}{$d}) if ( $shareM !~ m{^\Q$d/} );
+ }
+ $fio->{partialCache}{$shareM}
+ = $fio->{view}->dirAttrib($fio->{partialNum}, $share, $dir);
+ }
}
-sub attribGet
+sub attribGetWhere
{
- my($fio, $f) = @_;
- my($dir, $fname, $share, $shareM);
+ my($fio, $f, $noCache, $fname) = @_;
+ my($dir, $share, $shareM, $partial, $attr);
- $fname = $f->{name};
- $fname = "$fio->{xfer}{pathHdrSrc}/$fname"
+ if ( !defined($fname) ) {
+ $fname = $f->{name};
+ $fname = "$fio->{xfer}{pathHdrSrc}/$fname"
if ( defined($fio->{xfer}{pathHdrSrc}) );
+ }
$fname =~ s{//+}{/}g;
- if ( $fname =~ m{(.*)/(.*)} ) {
+ if ( $fname =~ m{(.*)/(.*)}s ) {
$shareM = $fio->{shareM};
$dir = $1;
$fname = $2;
$dir = "";
$fname = $fio->{share};
}
- $fio->viewCacheDir($share, $dir);
$shareM .= "/$dir" if ( $dir ne "" );
- return $fio->{viewCache}{$shareM}{$fname};
+
+ if ( $noCache ) {
+ $share = $fio->{share} if ( !defined($share) );
+ my $dirAttr = $fio->{view}->dirAttrib($fio->{viewNum}, $share, $dir);
+ $attr = $dirAttr->{$fname};
+ } else {
+ $fio->viewCacheDir($share, $dir);
+ if ( defined($attr = $fio->{viewCache}{$shareM}{$fname}) ) {
+ $partial = 0;
+ } elsif ( defined($attr = $fio->{partialCache}{$shareM}{$fname}) ) {
+ $partial = 1;
+ } else {
+ return;
+ }
+ if ( $attr->{mode} & S_HLINK_TARGET ) {
+ $attr->{hlink_self} = 1;
+ $attr->{mode} &= ~S_HLINK_TARGET;
+ }
+ }
+ return ($attr, $partial);
+}
+
+sub attribGet
+{
+ my($fio, $f, $doHardLink) = @_;
+
+ my($attr) = $fio->attribGetWhere($f);
+ if ( $doHardLink && $attr->{type} == BPC_FTYPE_HARDLINK ) {
+ $fio->log("$attr->{fullPath}: opening for hardlink read"
+ . " (name = $f->{name})") if ( $fio->{logLevel} >= 4 );
+ my $fh = BackupPC::FileZIO->open($attr->{fullPath}, 0,
+ $attr->{compress});
+ my $target;
+ if ( defined($fh) ) {
+ $fh->read(\$target, 65536);
+ $fh->close;
+ $target =~ s/^\.?\/+//;
+ } else {
+ $fio->log("$attr->{fullPath}: can't open for hardlink read");
+ $fio->{stats}{errorCnt}++;
+ $attr->{type} = BPC_FTYPE_FILE;
+ return $attr;
+ }
+ $target = "/$target" if ( $target !~ /^\// );
+ $fio->log("$attr->{fullPath}: redirecting to $target")
+ if ( $fio->{logLevel} >= 4 );
+ $target =~ s{^/+}{};
+ ($attr) = $fio->attribGetWhere($f, 1, $target);
+ $fio->log(" ... now got $attr->{fullPath}")
+ if ( $fio->{logLevel} >= 4 );
+ }
+ return $attr;
}
sub mode2type
{
- my($fio, $mode) = @_;
+ my($fio, $f) = @_;
+ my $mode = $f->{mode};
if ( ($mode & S_IFMT) == S_IFREG ) {
- return BPC_FTYPE_FILE;
+ if ( defined($f->{hlink}) && !$f->{hlink_self} ) {
+ return BPC_FTYPE_HARDLINK;
+ } else {
+ return BPC_FTYPE_FILE;
+ }
} elsif ( ($mode & S_IFMT) == S_IFDIR ) {
return BPC_FTYPE_DIR;
} elsif ( ($mode & S_IFMT) == S_IFLNK ) {
my($fio, $f, $placeHolder) = @_;
my($dir, $file);
- if ( $f->{name} =~ m{(.*)/(.*)} ) {
+ return if ( $placeHolder && $fio->{phase} > 0 );
+
+ if ( $f->{name} =~ m{(.*)/(.*)}s ) {
$file = $2;
$dir = "$fio->{shareM}/" . $1;
} elsif ( $f->{name} eq "." ) {
$file = $f->{name};
}
- if ( !defined($fio->{attribLastDir}) || $fio->{attribLastDir} ne $dir ) {
+ if ( $dir ne ""
+ && (!defined($fio->{attribLastDir}) || $fio->{attribLastDir} ne $dir) ) {
#
# Flush any directories that don't match the first part
- # of the new directory
+ # of the new directory. Don't flush the top-level directory
+ # (ie: $dir eq "") since the "." might get sorted in the middle
+ # of other top-level directories or files.
#
foreach my $d ( keys(%{$fio->{attrib}}) ) {
next if ( $d eq "" || "$dir/" =~ m{^\Q$d/} );
$fio->{attribLastDir} = $dir;
}
if ( !exists($fio->{attrib}{$dir}) ) {
+ $fio->log("attribSet: dir=$dir not found") if ( $fio->{logLevel} >= 4 );
$fio->{attrib}{$dir} = BackupPC::Attrib->new({
compress => $fio->{xfer}{compress},
});
- my $path = $fio->{outDir} . $dir;
- if ( -f $fio->{attrib}{$dir}->fileName($path)
- && !$fio->{attrib}{$dir}->read($path) ) {
- $fio->log(sprintf("Unable to read attribute file %s",
+ my $dirM = $dir;
+ $dirM = $1 . "/" . $fio->{bpc}->fileNameMangle($2)
+ if ( $dirM =~ m{(.*?)/(.*)}s );
+ my $path = $fio->{outDir} . $dirM;
+ if ( -f $fio->{attrib}{$dir}->fileName($path) ) {
+ if ( !$fio->{attrib}{$dir}->read($path) ) {
+ $fio->log(sprintf("Unable to read attribute file %s",
$fio->{attrib}{$dir}->fileName($path)));
+ } else {
+ $fio->log(sprintf("attribRead file %s",
+ $fio->{attrib}{$dir}->fileName($path)))
+ if ( $fio->{logLevel} >= 4 );
+ }
}
+ } else {
+ $fio->log("attribSet: dir=$dir exists") if ( $fio->{logLevel} >= 4 );
}
- $fio->log("attribSet(dir=$dir, file=$file)") if ( $fio->{logLevel} >= 4 );
+ $fio->log("attribSet(dir=$dir, file=$file, size=$f->{size}, placeholder=$placeHolder)")
+ if ( $fio->{logLevel} >= 4 );
+
+ my $mode = $f->{mode};
+ $mode |= S_HLINK_TARGET if ( $f->{hlink_self} );
$fio->{attrib}{$dir}->set($file, {
- type => $fio->mode2type($f->{mode}),
- mode => $f->{mode},
+ type => $fio->mode2type($f),
+ mode => $mode,
uid => $f->{uid},
gid => $f->{gid},
size => $placeHolder ? -1 : $f->{size},
return;
}
return if ( !defined($fio->{attrib}{$d}) );
+
#
# Set deleted files in the attributes. Any file in the view
- # that doesn't have attributes is deleted. All files sent by
- # rsync have attributes temporarily set so we can do deletion
- # detection. We also prune these temporary attributes.
+ # that doesn't have attributes is flagged as deleted for
+ # incremental dumps. All files sent by rsync have attributes
+ # temporarily set so we can do deletion detection. We also
+ # prune these temporary attributes.
#
if ( $d ne "" ) {
my $dir;
my $share;
- $dir = $1 if ( $d =~ m{.+?/(.*)} );
+ $dir = $1 if ( $d =~ m{.+?/(.*)}s );
$fio->viewCacheDir(undef, $dir);
##print("attribWrite $d,$dir\n");
##$Data::Dumper::Indent = 1;
if ( defined($fio->{viewCache}{$d}) ) {
foreach my $f ( keys(%{$fio->{viewCache}{$d}}) ) {
my $name = $f;
- $name = "$1/$name" if ( $d =~ m{.*?/(.*)} );
+ $name = "$1/$name" if ( $d =~ m{.*?/(.*)}s );
if ( defined(my $a = $fio->{attrib}{$d}->get($f)) ) {
#
# delete temporary attributes (skipped files)
$fio->logFileAction("skip", {
%{$fio->{viewCache}{$d}{$f}},
name => $name,
- }) if ( $fio->{logLevel} >= 2 );
+ }) if ( $fio->{logLevel} >= 2
+ && $a->{type} == BPC_FTYPE_FILE );
}
- } else {
+ } elsif ( $fio->{phase} == 0 && !$fio->{full} ) {
##print("Delete file $f\n");
$fio->logFileAction("delete", {
%{$fio->{viewCache}{$d}{$f}},
}
}
}
- if ( $fio->{attrib}{$d}->fileCount ) {
+ if ( $fio->{attrib}{$d}->fileCount || $fio->{phase} > 0 ) {
my $data = $fio->{attrib}{$d}->writeData;
my $dirM = $d;
$dirM = $1 . "/" . $fio->{bpc}->fileNameMangle($2)
- if ( $dirM =~ m{(.*?)/(.*)} );
+ if ( $dirM =~ m{(.*?)/(.*)}s );
my $fileName = $fio->{attrib}{$d}->fileName("$fio->{outDir}$dirM");
$fio->log("attribWrite(dir=$d) -> $fileName")
if ( $fio->{logLevel} >= 4 );
sub makePath
{
my($fio, $f) = @_;
- my $name = $1 if ( $f->{name} =~ /(.*)/ );
+ my $name = $1 if ( $f->{name} =~ /(.*)/s );
my $path;
if ( $name eq "." ) {
}
$fio->logFileAction("create", $f) if ( $fio->{logLevel} >= 1 );
$fio->log("makePath($path, 0777)") if ( $fio->{logLevel} >= 5 );
- $path = $1 if ( $path =~ /(.*)/ );
+ $path = $1 if ( $path =~ /(.*)/s );
File::Path::mkpath($path, 0, 0777) if ( !-d $path );
return $fio->attribSet($f) if ( -d $path );
$fio->log("Can't create directory $path");
sub makeSpecial
{
my($fio, $f) = @_;
- my $name = $1 if ( $f->{name} =~ /(.*)/ );
+ my $name = $1 if ( $f->{name} =~ /(.*)/s );
my $fNameM = $fio->{bpc}->fileNameMangle($name);
my $path = $fio->{outDirSh} . $fNameM;
my $attr = $fio->attribGet($f);
my $str = "";
- my $type = $fio->mode2type($f->{mode});
+ my $type = $fio->mode2type($f);
$fio->log("makeSpecial($path, $type, $f->{mode})")
if ( $fio->{logLevel} >= 5 );
if ( $type == BPC_FTYPE_CHARDEV || $type == BPC_FTYPE_BLOCKDEV ) {
my($major, $minor, $fh, $fileData);
- $major = $f->{rdev} >> 8;
- $minor = $f->{rdev} & 0xff;
+ if ( defined($f->{rdev_major}) ) {
+ $major = $f->{rdev_major};
+ $minor = $f->{rdev_minor};
+ } else {
+ $major = $f->{rdev} >> 8;
+ $minor = $f->{rdev} & 0xff;
+ }
$str = "$major,$minor";
} elsif ( ($f->{mode} & S_IFMT) == S_IFLNK ) {
$str = $f->{link};
+ } elsif ( ($f->{mode} & S_IFMT) == S_IFREG ) {
+ #
+ # this is a hardlink
+ #
+ if ( !defined($f->{hlink}) ) {
+ $fio->log("Error: makeSpecial($path, $type, $f->{mode}) called"
+ . " on a regular non-hardlink file");
+ return 1;
+ }
+ $str = $f->{hlink};
}
#
# Now see if the file is different, or this is a full, in which
my($fh, $fileData);
if ( $fio->{full}
|| !defined($attr)
- || $attr->{type} != $fio->mode2type($f->{mode})
- || $attr->{mtime} != $f->{mtime}
- || $attr->{size} != $f->{size}
- || $attr->{uid} != $f->{uid}
- || $attr->{gid} != $f->{gid}
- || $attr->{mode} != $f->{mode}
+ || $attr->{type} != $type
+ || $attr->{mtime} != $f->{mtime}
+ || $attr->{size} != $f->{size}
+ || $attr->{uid} != $f->{uid}
+ || $attr->{gid} != $f->{gid}
+ || $attr->{mode} != $f->{mode}
+ || $attr->{hlink_self} != $f->{hlink_self}
|| !defined($fh = BackupPC::FileZIO->open($attr->{fullPath}, 0,
$attr->{compress}))
|| $fh->read(\$fileData, length($str) + 1) != length($str)
$fh->close if ( defined($fh) );
}
+#
+# Make a hardlink. Returns non-zero on error.
+# This actually gets called twice for each hardlink.
+# Once as the file list is processed, and again at
+# the end. BackupPC does them as it goes (since it is
+# just saving the hardlink info and not actually making
+# hardlinks).
+#
+sub makeHardLink
+{
+ my($fio, $f, $end) = @_;
+
+ return if ( $end );
+ return $fio->makeSpecial($f) if ( !$f->{hlink_self} );
+}
+
sub unlink
{
my($fio, $path) = @_;
my $owner = "$f->{uid}/$f->{gid}";
my $type = (("", "p", "c", "", "d", "", "b", "", "", "", "l", "", "s"))
[($f->{mode} & S_IFMT) >> 12];
+ my $name = $f->{name};
+
+ if ( ($f->{mode} & S_IFMT) == S_IFLNK ) {
+ $name .= " -> $f->{link}";
+ } elsif ( ($f->{mode} & S_IFMT) == S_IFREG
+ && defined($f->{hlink}) && !$f->{hlink_self} ) {
+ $name .= " -> $f->{hlink}";
+ }
+ $name =~ s/\n/\\n/g;
$fio->log(sprintf(" %-6s %1s%4o %9s %11.0f %s",
$action,
$f->{mode} & 07777,
$owner,
$f->{size},
- $f->{name}));
+ $name));
}
#
-# Later we'll use this function to complete a prior unfinished dump.
-# We'll do an incremental on the part we have already, and then a
-# full or incremental against the rest.
+# If there is a partial and we are doing a full, we do an incremental
+# against the partial and a full against the rest. This subroutine
+# is how we tell File::RsyncP which files to ignore attributes on
+# (ie: against the partial dump we do consider the attributes, but
+# otherwise we ignore attributes).
#
sub ignoreAttrOnFile
{
- return undef;
+ my($fio, $f) = @_;
+
+ return if ( !defined($fio->{partialNum}) );
+ my($attr, $isPartial) = $fio->attribGetWhere($f);
+ $fio->log("$f->{name}: just checking attributes from partial")
+ if ( $isPartial && $fio->{logLevel} >= 5 );
+ return !$isPartial;
+}
+
+#
+# This is called by File::RsyncP when a file is skipped because the
+# attributes match.
+#
+sub attrSkippedFile
+{
+ my($fio, $f, $attr) = @_;
+
+ #
+ # Unless this is a partial, this is normal so ignore it.
+ #
+ return if ( !defined($fio->{partialNum}) );
+
+ $fio->log("$f->{name}: skipped in partial; adding link")
+ if ( $fio->{logLevel} >= 5 );
+ $fio->{rxLocalAttr} = $attr;
+ $fio->{rxFile} = $f;
+ $fio->{rxSize} = $attr->{size};
+ delete($fio->{rxInFd});
+ delete($fio->{rxOutFd});
+ delete($fio->{rxDigest});
+ delete($fio->{rxInData});
+ return $fio->fileDeltaRxDone();
}
#
. " ($fio->{rxFile}{size} vs $rxSize)")
if ( $fio->{logLevel} >= 5 );
}
+ #
+ # If compression was off and now on, or on and now off, then
+ # don't do an exact match.
+ #
+ if ( defined($fio->{rxLocalAttr})
+ && !$fio->{rxLocalAttr}{compress} != !$fio->{xfer}{compress} ) {
+ $fio->{rxMatchBlk} = undef; # compression changed, so no file match
+ $fio->log("$fio->{rxFile}{name}: compression changed, so no match"
+ . " ($fio->{rxLocalAttr}{compress} vs $fio->{xfer}{compress})")
+ if ( $fio->{logLevel} >= 4 );
+ }
+ #
+ # If the local file is a hardlink then no match
+ #
+ if ( defined($fio->{rxLocalAttr})
+ && $fio->{rxLocalAttr}{type} == BPC_FTYPE_HARDLINK ) {
+ $fio->{rxMatchBlk} = undef;
+ $fio->log("$fio->{rxFile}{name}: no match on hardlinks")
+ if ( $fio->{logLevel} >= 4 );
+ my $fCopy;
+ # need to copy since hardlink attribGet overwrites the name
+ %{$fCopy} = %$f;
+ $fio->{rxHLinkAttr} = $fio->attribGet($fCopy, 1); # hardlink attributes
+ } else {
+ delete($fio->{rxHLinkAttr});
+ }
delete($fio->{rxInFd});
delete($fio->{rxOutFd});
delete($fio->{rxDigest});
# need to open an output file where we will build the
# new version.
#
- $fio->{rxFile}{name} =~ /(.*)/;
+ $fio->{rxFile}{name} =~ /(.*)/s;
my $rxOutFileRel = "$fio->{shareM}/" . $fio->{bpc}->fileNameMangle($1);
my $rxOutFile = $fio->{outDir} . $rxOutFileRel;
$fio->{rxOutFd} = BackupPC::PoolWrite->new($fio->{bpc},
if ( $fio->{logLevel} >= 9 );
$fio->{rxOutFile} = $rxOutFile;
$fio->{rxOutFileRel} = $rxOutFileRel;
- $fio->{rxDigest} = File::RsyncP::Digest->new;
+ $fio->{rxDigest} = File::RsyncP::Digest->new();
+ $fio->{rxDigest}->protocol($fio->{protocol_version});
$fio->{rxDigest}->add(pack("V", $fio->{checksumSeed}));
}
if ( defined($fio->{rxMatchBlk})
# Need to copy the sequence of blocks that matched. If the file
# is compressed we need to make a copy of the uncompressed file,
# since the compressed file is not seekable. Future optimizations
- # would be to keep the uncompressed file in memory (eg, up to say
- # 10MB), only create an uncompressed copy if the matching
+ # could include only creating an uncompressed copy if the matching
# blocks were not monotonic, and to only do this if there are
# matching blocks (eg, maybe the entire file is new).
#
my $attr = $fio->{rxLocalAttr};
my $fh;
if ( !defined($fio->{rxInFd}) && !defined($fio->{rxInData}) ) {
+ my $inPath = $attr->{fullPath};
+ $inPath = $fio->{rxHLinkAttr}{fullPath}
+ if ( defined($fio->{rxHLinkAttr}) );
if ( $attr->{compress} ) {
if ( !defined($fh = BackupPC::FileZIO->open(
- $attr->{fullPath},
+ $inPath,
0,
$attr->{compress})) ) {
- $fio->log("Can't open $attr->{fullPath}");
+ $fio->log("Can't open $inPath");
$fio->{stats}{errorCnt}++;
return -1;
}
#
my $data;
$fio->{rxInData} = "";
- while ( $fh->read(\$data, 30 * 1024 * 1024) > 0 ) {
+ while ( $fh->read(\$data, 16 * 1024 * 1024) > 0 ) {
$fio->{rxInData} .= $data;
}
$fio->log("$attr->{fullPath}: cached all $attr->{size}"
if ( open(F, "+>", "$fio->{outDirSh}RStmp") ) {
my $data;
my $byteCnt = 0;
+ binmode(F);
while ( $fh->read(\$data, 1024 * 1024) > 0 ) {
if ( syswrite(F, $data) != length($data) ) {
$fio->log(sprintf("Can't write len=%d to %s",
}
$fh->close;
} else {
- if ( open(F, "<", $attr->{fullPath}) ) {
+ if ( open(F, "<", $inPath) ) {
+ binmode(F);
$fio->{rxInFd} = *F;
$fio->{rxInName} = $attr->{fullPath};
} else {
- $fio->log("Unable to open $attr->{fullPath}");
+ $fio->log("Unable to open $inPath");
$fio->{stats}{errorCnt}++;
return -1;
}
my $seekPosn = $fio->{rxMatchBlk} * $fio->{rxBlkSize};
if ( defined($fio->{rxInFd})
&& !sysseek($fio->{rxInFd}, $seekPosn, 0) ) {
- $fio->log("Unable to seek $attr->{rxInName} to $seekPosn");
+ $fio->log("Unable to seek $fio->{rxInName} to $seekPosn");
$fio->{stats}{errorCnt}++;
return -1;
}
#
sub fileDeltaRxDone
{
- my($fio, $md4) = @_;
- my $name = $1 if ( $fio->{rxFile}{name} =~ /(.*)/ );
+ my($fio, $md4, $phase) = @_;
+ my $name = $1 if ( $fio->{rxFile}{name} =~ /(.*)/s );
+ my $ret;
- if ( !defined($fio->{rxDigest}) ) {
- #
- # File was exact match, but we still need to verify the
- # MD4 checksum. Therefore open and read the file.
- #
- $fio->{rxDigest} = File::RsyncP::Digest->new;
- $fio->{rxDigest}->add(pack("V", $fio->{checksumSeed}));
- my $attr = $fio->{rxLocalAttr};
- if ( defined($attr) ) {
- if ( defined(my $fh = BackupPC::FileZIO->open(
- $attr->{fullPath},
- 0,
- $attr->{compress})) ) {
- my $data;
- while ( $fh->read(\$data, 4 * 65536) > 0 ) {
- $fio->{rxDigest}->add($data);
- $fio->{rxSize} += length($data);
- }
- $fh->close;
- } else {
- $fio->log("Can't open $attr->{fullPath} for MD4 check ($name)");
- $fio->{stats}{errorCnt}++;
- }
- }
- $fio->log("$name got exact match")
- if ( $fio->{logLevel} >= 5 );
- }
close($fio->{rxInFd}) if ( defined($fio->{rxInFd}) );
unlink("$fio->{outDirSh}RStmp") if ( -f "$fio->{outDirSh}RStmp" );
- my $newDigest = $fio->{rxDigest}->digest;
- if ( $fio->{logLevel} >= 3 ) {
- my $md4Str = unpack("H*", $md4);
- my $newStr = unpack("H*", $newDigest);
- $fio->log("$name got digests $md4Str vs $newStr")
- }
- if ( $md4 ne $newDigest ) {
- $fio->log("$name: fatal error: md4 doesn't match");
- $fio->{stats}{errorCnt}++;
- if ( defined($fio->{rxOutFd}) ) {
- $fio->{rxOutFd}->close;
- unlink($fio->{rxOutFile});
+ $fio->{phase} = $phase;
+
+ #
+ # Check the final md4 digest
+ #
+ if ( defined($md4) ) {
+ my $newDigest;
+ if ( !defined($fio->{rxDigest}) ) {
+ #
+ # File was exact match, but we still need to verify the
+ # MD4 checksum. Compute the md4 digest (or fetch the
+ # cached one.)
+ #
+ if ( defined(my $attr = $fio->{rxLocalAttr}) ) {
+ #
+ # block size doesn't matter: we're only going to
+ # fetch the md4 file digest, not the block digests.
+ #
+ my($err, $csum, $blkSize)
+ = BackupPC::Xfer::RsyncDigest->digestStart(
+ $attr->{fullPath}, $attr->{size},
+ 0, 2048, $fio->{checksumSeed}, 1,
+ $attr->{compress}, 1,
+ $fio->{protocol_version});
+ if ( $err ) {
+ $fio->log("Can't open $attr->{fullPath} for MD4"
+ . " check (err=$err, $name)");
+ $fio->{stats}{errorCnt}++;
+ } else {
+ if ( $fio->{logLevel} >= 5 ) {
+ my($isCached, $invalid) = $csum->isCached;
+ $fio->log("MD4 $attr->{fullPath} cache = $isCached,"
+ . " invalid = $invalid");
+ }
+ $newDigest = $csum->digestEnd;
+ }
+ $fio->{rxSize} = $attr->{size};
+ } else {
+ #
+ # Empty file; just create an empty file digest
+ #
+ $fio->{rxDigest} = File::RsyncP::Digest->new();
+ $fio->{rxDigest}->protocol($fio->{protocol_version});
+ $fio->{rxDigest}->add(pack("V", $fio->{checksumSeed}));
+ $newDigest = $fio->{rxDigest}->digest;
+ }
+ $fio->log("$name got exact match") if ( $fio->{logLevel} >= 5 );
+ } else {
+ $newDigest = $fio->{rxDigest}->digest;
+ }
+ if ( $fio->{logLevel} >= 3 ) {
+ my $md4Str = unpack("H*", $md4);
+ my $newStr = unpack("H*", $newDigest);
+ $fio->log("$name got digests $md4Str vs $newStr")
+ }
+ if ( $md4 ne $newDigest ) {
+ if ( $phase > 0 ) {
+ $fio->log("$name: fatal error: md4 doesn't match on retry;"
+ . " file removed");
+ } else {
+ $fio->log("$name: md4 doesn't match: will retry in phase 1;"
+ . " file removed");
+ }
+ $fio->{stats}{errorCnt}++;
+ if ( defined($fio->{rxOutFd}) ) {
+ $fio->{rxOutFd}->close;
+ unlink($fio->{rxOutFile});
+ }
+ delete($fio->{rxFile});
+ delete($fio->{rxOutFile});
+ return 1;
}
- return 1;
}
+
#
# One special case is an empty file: if the file size is
# zero we need to open the output file to create it.
my $f = $fio->{rxFile};
$fio->logFileAction("same", $f) if ( $fio->{logLevel} >= 1 );
if ( $fio->{full}
- || $attr->{type} != $f->{type}
- || $attr->{mtime} != $f->{mtime}
- || $attr->{size} != $f->{size}
- || $attr->{gid} != $f->{gid}
- || $attr->{mode} != $f->{mode} ) {
+ || $attr->{type} != $f->{type}
+ || $attr->{mtime} != $f->{mtime}
+ || $attr->{size} != $f->{size}
+ || $attr->{uid} != $f->{uid}
+ || $attr->{gid} != $f->{gid}
+ || $attr->{mode} != $f->{mode}
+ || $attr->{hlink_self} != $f->{hlink_self} ) {
#
# In the full case, or if the attributes are different,
# we need to make a link from the previous file and
#
my $rxOutFile = $fio->{outDirSh}
. $fio->{bpc}->fileNameMangle($name);
- if ( !link($attr->{fullPath}, $rxOutFile) ) {
- $fio->log("Unable to link $attr->{fullPath} to $rxOutFile");
- $fio->{stats}{errorCnt}++;
- return -1;
+ my($exists, $digest, $origSize, $outSize, $errs)
+ = BackupPC::PoolWrite::LinkOrCopy(
+ $fio->{bpc},
+ $attr->{fullPath},
+ $attr->{compress},
+ $rxOutFile,
+ $fio->{xfer}{compress});
+ #
+ # Cumulate the stats
+ #
+ $fio->{stats}{TotalFileCnt}++;
+ $fio->{stats}{TotalFileSize} += $fio->{rxSize};
+ $fio->{stats}{ExistFileCnt}++;
+ $fio->{stats}{ExistFileSize} += $fio->{rxSize};
+ $fio->{stats}{ExistFileCompSize} += -s $rxOutFile;
+ $fio->{rxFile}{size} = $fio->{rxSize};
+ $ret = $fio->attribSet($fio->{rxFile});
+ $fio->log(@$errs) if ( defined($errs) && @$errs );
+
+ if ( !$exists && $outSize > 0 ) {
+ #
+ # the hard link failed, most likely because the target
+ # file has too many links. We have copied the file
+ # instead, so add this to the new file list.
+ #
+ my $rxOutFileRel = "$fio->{shareM}/"
+ . $fio->{bpc}->fileNameMangle($name);
+ $rxOutFileRel =~ s{^/+}{};
+ my $fh = $fio->{newFilesFH};
+ print($fh "$digest $origSize $rxOutFileRel\n")
+ if ( defined($fh) );
}
- #
- # Cumulate the stats
- #
- $fio->{stats}{TotalFileCnt}++;
- $fio->{stats}{TotalFileSize} += $fio->{rxSize};
- $fio->{stats}{ExistFileCnt}++;
- $fio->{stats}{ExistFileSize} += $fio->{rxSize};
- $fio->{stats}{ExistFileCompSize} += -s $rxOutFile;
- $fio->{rxFile}{size} = $fio->{rxSize};
- return $fio->attribSet($fio->{rxFile});
}
- }
- if ( defined($fio->{rxOutFd}) ) {
+ } else {
my $exist = $fio->processClose($fio->{rxOutFd},
$fio->{rxOutFileRel},
$fio->{rxSize}, 1);
$fio->logFileAction($exist ? "pool" : "create", $fio->{rxFile})
if ( $fio->{logLevel} >= 1 );
$fio->{rxFile}{size} = $fio->{rxSize};
- return $fio->attribSet($fio->{rxFile});
+ $ret = $fio->attribSet($fio->{rxFile});
}
delete($fio->{rxDigest});
delete($fio->{rxInData});
- return;
+ delete($fio->{rxFile});
+ delete($fio->{rxOutFile});
+ return $ret;
}
#
my($a, $fio, $fList, $outputFunc) = @_;
my $name = $a->{relPath};
my $n = $name;
- my $type = $fio->mode2type($a->{mode});
+ my $type = $a->{type};
my $extraAttribs = {};
+ if ( $a->{mode} & S_HLINK_TARGET ) {
+ $a->{hlink_self} = 1;
+ $a->{mode} &= ~S_HLINK_TARGET;
+ }
$n =~ s/^\Q$fio->{xfer}{pathHdrSrc}//;
- $fio->log("Sending $name (remote=$n)") if ( $fio->{logLevel} >= 4 );
+ $fio->log("Sending $name (remote=$n) type = $type") if ( $fio->{logLevel} >= 1 );
if ( $type == BPC_FTYPE_CHARDEV
|| $type == BPC_FTYPE_BLOCKDEV
|| $type == BPC_FTYPE_SYMLINK ) {
my $fh = BackupPC::FileZIO->open($a->{fullPath}, 0, $a->{compress});
- my $str;
+ my($str, $rdSize);
if ( defined($fh) ) {
- if ( $fh->read(\$str, $a->{size} + 1) == $a->{size} ) {
- if ( $type == BPC_FTYPE_SYMLINK ) {
- #
- # Reconstruct symbolic link
- #
- $extraAttribs = { link => $str };
- } elsif ( $str =~ /(\d*),(\d*)/ ) {
- #
- # Reconstruct char or block special major/minor device num
- #
- $extraAttribs = { rdev => $1 * 256 + $2 };
- } else {
- $fio->log("$name: unexpected special file contents $str");
+ $rdSize = $fh->read(\$str, $a->{size} + 1024);
+ if ( $type == BPC_FTYPE_SYMLINK ) {
+ #
+ # Reconstruct symbolic link
+ #
+ $extraAttribs = { link => $str };
+ if ( $rdSize != $a->{size} ) {
+ # ERROR
+ $fio->log("$name: can't read exactly $a->{size} bytes");
$fio->{stats}{errorCnt}++;
}
+ } elsif ( $str =~ /(\d*),(\d*)/ ) {
+ #
+ # Reconstruct char or block special major/minor device num
+ #
+ # Note: char/block devices have $a->{size} = 0, so we
+ # can't do an error check on $rdSize.
+ #
+ $extraAttribs = {
+ rdev => $1 * 256 + $2,
+ rdev_major => $1,
+ rdev_minor => $2,
+ };
} else {
- # ERROR
- $fio->log("$name: can't read exactly $a->{size} bytes");
+ $fio->log("$name: unexpected special file contents $str");
$fio->{stats}{errorCnt}++;
}
$fh->close;
$fio->log("$name: can't open");
$fio->{stats}{errorCnt}++;
}
+ } elsif ( $fio->{preserve_hard_links}
+ && ($type == BPC_FTYPE_HARDLINK || $type == BPC_FTYPE_FILE)
+ && ($type == BPC_FTYPE_HARDLINK
+ || $fio->{protocol_version} < 27
+ || $a->{hlink_self}) ) {
+ #
+ # Fill in fake inode information so that the remote rsync
+ # can correctly create hardlinks.
+ #
+ $name =~ s/^\.?\/+//;
+ my($target, $inode);
+
+ if ( $type == BPC_FTYPE_HARDLINK ) {
+ my $fh = BackupPC::FileZIO->open($a->{fullPath}, 0,
+ $a->{compress});
+ if ( defined($fh) ) {
+ $fh->read(\$target, 65536);
+ $fh->close;
+ $target =~ s/^\.?\/+//;
+ if ( defined($fio->{hlinkFile2Num}{$target}) ) {
+ $inode = $fio->{hlinkFile2Num}{$target};
+ } else {
+ $inode = $fio->{fileListCnt};
+ $fio->{hlinkFile2Num}{$target} = $inode;
+ }
+ } else {
+ $fio->log("$a->{fullPath}: can't open for hardlink");
+ $fio->{stats}{errorCnt}++;
+ }
+ } elsif ( $a->{hlink_self} ) {
+ if ( defined($fio->{hlinkFile2Num}{$name}) ) {
+ $inode = $fio->{hlinkFile2Num}{$name};
+ } else {
+ $inode = $fio->{fileListCnt};
+ $fio->{hlinkFile2Num}{$name} = $inode;
+ }
+ }
+ $inode = $fio->{fileListCnt} if ( !defined($inode) );
+ $fio->log("$name: setting inode to $inode");
+ $extraAttribs = {
+ %$extraAttribs,
+ dev => 0,
+ inode => $inode,
+ };
}
my $f = {
- name => $n,
- #dev => 0, # later, when we support hardlinks
- #inode => 0, # later, when we support hardlinks
- mode => $a->{mode},
- uid => $a->{uid},
- gid => $a->{gid},
- mtime => $a->{mtime},
- size => $a->{size},
- %$extraAttribs,
+ name => $n,
+ mode => $a->{mode} & ~S_HLINK_TARGET,
+ uid => $a->{uid},
+ gid => $a->{gid},
+ mtime => $a->{mtime},
+ size => $a->{size},
+ %$extraAttribs,
};
+ my $logName = $f->{name};
+ from_to($f->{name}, "utf8", $fio->{clientCharset})
+ if ( $fio->{clientCharset} ne "" );
$fList->encode($f);
- $f->{name} = "$fio->{xfer}{pathHdrDest}/$f->{name}";
- $f->{name} =~ s{//+}{/}g;
+
+ $logName = "$fio->{xfer}{pathHdrDest}/$logName";
+ $logName =~ s{//+}{/}g;
+ $f->{name} = $logName;
$fio->logFileAction("restore", $f) if ( $fio->{logLevel} >= 1 );
+
&$outputFunc($fList->encodeData);
#
# Cumulate stats
#
+ $fio->{fileListCnt}++;
if ( $type != BPC_FTYPE_DIR ) {
$fio->{stats}{TotalFileCnt}++;
$fio->{stats}{TotalFileSize} += $a->{size};
#
$fio->log("fileListSend: sending file list: "
. join(" ", @{$fio->{fileList}})) if ( $fio->{logLevel} >= 4 );
+ $fio->{fileListCnt} = 0;
+ $fio->{hlinkFile2Num} = {};
foreach my $name ( @{$fio->{fileList}} ) {
$fio->{view}->find($fio->{xfer}{bkupSrcNum},
$fio->{xfer}{bkupSrcShare},
{
my($fio, $isChild) = @_;
+ #
+ # If we are aborting early, remove the last file since
+ # it was not complete
+ #
+ if ( $isChild && defined($fio->{rxFile}) ) {
+ unlink("$fio->{outDirSh}RStmp") if ( -f "$fio->{outDirSh}RStmp" );
+ if ( defined($fio->{rxFile}) ) {
+ unlink($fio->{rxOutFile});
+ $fio->log("finish: removing in-process file $fio->{rxFile}{name}");
+ }
+ }
+
#
# Flush the attributes if this is the child
#
- $fio->attribWrite(undef);
+ $fio->attribWrite(undef) if ( $isChild );
}
#sub is_tainted