# Craig Barratt <cbarratt@users.sourceforge.net>
#
# COPYRIGHT
-# Copyright (C) 2001 Craig Barratt
+# Copyright (C) 2001-2009 Craig Barratt
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
#
#========================================================================
#
-# Version 2.0.0_CVS, released 18 Jan 2003.
+# Version 3.2.0, released 31 Jul 2010.
#
# See http://backuppc.sourceforge.net.
#
eof => undef,
}, $class;
+ $self->{hardLinkMax} = $bpc->ConfValue("HardLinkMax");
+
#
# Always unlink any current file in case it is already linked
#
unlink($fileName) if ( -f $fileName );
+ if ( $fileName =~ m{(.*)/.+} && !-d $1 ) {
+ my $newDir = $1;
+ eval { mkpath($newDir, 0, 0777) };
+ if ( $@ ) {
+ push(@{$self->{errors}}, "Unable to create directory $newDir for $self->{fileName}");
+ }
+ }
return $self;
}
-my $BufSize = 1048576; # 1MB or 2^20
-my $MaxFiles = 20;
+my $BufSize = 1048576; # 1MB or 2^20
+my $MaxFiles = 20; # max number of compare files open at one time
sub write
{
return if ( $a->{eof} );
$a->{data} .= $$dataRef if ( defined($dataRef) );
return if ( length($a->{data}) < $BufSize && defined($dataRef) );
- if ( !defined($a->{digest}) && $a->{fileSize} > 0 ) {
+
+ #
+ # Correct the fileSize if it is wrong (rsync might transfer
+ # a file whose length is different to the length sent with the
+ # file list if the file changes between the file list sending
+ # and the file sending). Here we only catch the case where
+ # we haven't computed the digest (ie: we have written no more
+ # than $BufSize). We catch the big file case below.
+ #
+ if ( !defined($dataRef) && !defined($a->{digest})
+ && $a->{fileSize} != length($a->{data}) ) {
+ #my $newSize = length($a->{data});
+ #print("Fixing file size from $a->{fileSize} to $newSize\n");
+ $a->{fileSize} = length($a->{data});
+ }
+
+ if ( !defined($a->{digest}) && length($a->{data}) > 0 ) {
#
# build a list of all the candidate matching files
#
my $md5 = Digest::MD5->new;
+ $a->{fileSize} = length($a->{data})
+ if ( $a->{fileSize} < length($a->{data}) );
$a->{digest} = $a->{bpc}->Buffer2MD5($md5, $a->{fileSize}, \$a->{data});
if ( !defined($a->{base} = $a->{bpc}->MD52Path($a->{digest},
$a->{compress})) ) {
push(@{$a->{errors}}, "Unable to get path from '$a->{digest}'"
- . " for $a->{fileName}\n");
+ . " for $a->{fileName}");
} else {
while ( @{$a->{files}} < $MaxFiles ) {
my $fh;
my $fileName = $a->{fileCnt} < 0 ? $a->{base}
: "$a->{base}_$a->{fileCnt}";
last if ( !-f $fileName );
- if ( !defined($fh = BackupPC::FileZIO->open($fileName, 0,
+ #
+ # Don't attempt to match pool files that already
+ # have too many hardlinks. Also, don't match pool
+ # files with only one link since starting in
+ # BackupPC v3.0, BackupPC_nightly could be running
+ # in parallel (and removing those files). This doesn't
+ # eliminate all possible race conditions, but just
+ # reduces the odds. Other design steps eliminate
+ # the remaining race conditions of linking vs
+ # removing.
+ #
+ if ( (stat(_))[3] >= $a->{hardLinkMax}
+ || (stat(_))[3] <= 1
+ || !defined($fh = BackupPC::FileZIO->open($fileName, 0,
$a->{compress})) ) {
$a->{fileCnt}++;
next;
1, $a->{compress});
if ( !defined($a->{fhOut}) ) {
push(@{$a->{errors}}, "Unable to open $a->{fileName}"
- . " for writing\n");
+ . " for writing");
}
}
}
my $dataLen = length($a->{data});
- if ( !defined($a->{fhOut}) && $a->{fileSize} > 0 ) {
+ if ( !defined($a->{fhOut}) && length($a->{data}) > 0 ) {
#
# See if the new chunk of data continues to match the
# candidate files.
#
while ( -f $fileName ) {
my $fh;
- if ( !defined($fh = BackupPC::FileZIO->open($fileName, 0,
+ if ( (stat(_))[3] >= $a->{hardLinkMax}
+ || !defined($fh = BackupPC::FileZIO->open($fileName, 0,
$a->{compress})) ) {
$a->{fileCnt}++;
#print(" Discarding $fileName (open failed)\n");
if ( !$a->{files}[$i]->{fh}->rewind() ) {
push(@{$a->{errors}},
"Unable to rewind $a->{files}[$i]->{name}"
- . " for compare\n");
+ . " for compare");
}
$match = $a->filePartialCompare($a->{files}[$i]->{fh}, $fh,
$a->{nWrite}, $dataLen, \$a->{data});
if ( !defined($a->{fhOut}) ) {
push(@{$a->{errors}},
"Unable to open $a->{fileName}"
- . " for writing\n");
+ . " for writing");
} else {
if ( !$a->{files}[$i]->{fh}->rewind() ) {
push(@{$a->{errors}},
"Unable to rewind"
- . " $a->{files}[$i]->{name} for copy\n");
+ . " $a->{files}[$i]->{name} for copy");
}
$a->filePartialCopy($a->{files}[$i]->{fh}, $a->{fhOut},
$a->{nWrite});
my $n = $a->{fhOut}->write(\$a->{data});
if ( $n != $dataLen ) {
push(@{$a->{errors}}, "Unable to write $dataLen bytes to"
- . " $a->{fileName} (got $n)\n");
+ . " $a->{fileName} (got $n)");
}
}
$a->{nWrite} += $dataLen;
# We are at EOF, so finish up
#
$a->{eof} = 1;
- foreach my $f ( @{$a->{files}} ) {
- $f->{fh}->close();
+
+ #
+ # Make sure the fileSize was correct. See above for comments about
+ # rsync.
+ #
+ if ( $a->{nWrite} != $a->{fileSize} ) {
+ #
+ # Oops, fileSize was wrong, so our MD5 digest was wrong and our
+ # effort to match files likely failed. This is ugly, but our
+ # only choice at this point is to re-write the entire file with
+ # the correct length. We need to rename the file, open it for
+ # reading, and then re-write the file with the correct length.
+ #
+
+ #print("Doing big file fixup ($a->{fileSize} != $a->{nWrite})\n");
+
+ my($fh, $fileName);
+ $a->{fileSize} = $a->{nWrite};
+
+ if ( defined($a->{fhOut}) ) {
+ if ( $a->{fileName} =~ /(.*)\// ) {
+ $fileName = $1;
+ } else {
+ $fileName = ".";
+ }
+ #
+ # Find a unique target temporary file name
+ #
+ my $i = 0;
+ while ( -f "$fileName/t$$.$i" ) {
+ $i++;
+ }
+ $fileName = "$fileName/t$$.$i";
+ $a->{fhOut}->close();
+ if ( !rename($a->{fileName}, $fileName)
+ || !defined($fh = BackupPC::FileZIO->open($fileName, 0,
+ $a->{compress})) ) {
+ push(@{$a->{errors}}, "Can't rename $a->{fileName} -> $fileName"
+ . " or open during size fixup");
+ }
+ #print("Using temporary name $fileName\n");
+ } elsif ( defined($a->{files}) && defined($a->{files}[0]) ) {
+ #
+ # We haven't written anything yet, so just use the
+ # compare file to copy from.
+ #
+ $fh = $a->{files}[0]->{fh};
+ $fh->rewind;
+ #print("Using compare file $a->{files}[0]->{name}\n");
+ }
+ if ( defined($fh) ) {
+ my $poolWrite = BackupPC::PoolWrite->new($a->{bpc}, $a->{fileName},
+ $a->{fileSize}, $a->{compress});
+ my $nRead = 0;
+
+ while ( $nRead < $a->{fileSize} ) {
+ my $thisRead = $a->{fileSize} - $nRead < $BufSize
+ ? $a->{fileSize} - $nRead : $BufSize;
+ my $data;
+ my $n = $fh->read(\$data, $thisRead);
+ if ( $n != $thisRead ) {
+ push(@{$a->{errors}},
+ "Unable to read $thisRead bytes during resize"
+ . " from temp $fileName (got $n)");
+ last;
+ }
+ $poolWrite->write(\$data);
+ $nRead += $thisRead;
+ }
+ $fh->close;
+ unlink($fileName) if ( defined($fileName) );
+ if ( @{$a->{errors}} ) {
+ $poolWrite->close;
+ return (0, $a->{digest}, -s $a->{fileName}, $a->{errors});
+ } else {
+ return $poolWrite->close;
+ }
+ }
}
+
if ( $a->{fileSize} == 0 ) {
#
# Simply create an empty file
local(*OUT);
if ( !open(OUT, ">", $a->{fileName}) ) {
push(@{$a->{errors}}, "Can't open $a->{fileName} for empty"
- . " output\n");
+ . " output");
} else {
close(OUT);
}
+ #
+ # Close the compare files
+ #
+ foreach my $f ( @{$a->{files}} ) {
+ $f->{fh}->close();
+ }
return (1, $a->{digest}, -s $a->{fileName}, $a->{errors});
} elsif ( defined($a->{fhOut}) ) {
$a->{fhOut}->close();
+ #
+ # Close the compare files
+ #
+ foreach my $f ( @{$a->{files}} ) {
+ $f->{fh}->close();
+ }
return (0, $a->{digest}, -s $a->{fileName}, $a->{errors});
} else {
if ( @{$a->{files}} == 0 ) {
push(@{$a->{errors}}, "Botch, no matches on $a->{fileName}"
- . " ($a->{digest})\n");
+ . " ($a->{digest})");
} elsif ( @{$a->{files}} > 1 ) {
- my $str = "Unexpected multiple matches on"
- . " $a->{fileName} ($a->{digest})\n";
- for ( my $i = 0 ; $i < @{$a->{files}} ; $i++ ) {
- $str .= " -> $a->{files}[$i]->{name}\n";
+ #
+ # This is no longer a real error because $Conf{HardLinkMax}
+ # could be hit, thereby creating identical pool files
+ #
+ #my $str = "Unexpected multiple matches on"
+ # . " $a->{fileName} ($a->{digest})\n";
+ #for ( my $i = 0 ; $i < @{$a->{files}} ; $i++ ) {
+ # $str .= " -> $a->{files}[$i]->{name}\n";
+ #}
+ #push(@{$a->{errors}}, $str);
+ }
+ for ( my $i = 0 ; $i < @{$a->{files}} ; $i++ ) {
+ if ( link($a->{files}[$i]->{name}, $a->{fileName}) ) {
+ #print(" Linked $a->{fileName} to $a->{files}[$i]->{name}\n");
+ #
+ # Close the compare files
+ #
+ foreach my $f ( @{$a->{files}} ) {
+ $f->{fh}->close();
+ }
+ return (1, $a->{digest}, -s $a->{fileName}, $a->{errors});
+ }
+ }
+ #
+ # We were unable to link to the pool. Either we're at the
+ # hardlink max, or the pool file got deleted. Recover by
+ # writing the matching file, since we still have an open
+ # handle.
+ #
+ for ( my $i = 0 ; $i < @{$a->{files}} ; $i++ ) {
+ if ( !$a->{files}[$i]->{fh}->rewind() ) {
+ push(@{$a->{errors}},
+ "Unable to rewind $a->{files}[$i]->{name}"
+ . " for copy after link fail");
+ next;
}
- push(@{$a->{errors}}, $str);
+ $a->{fhOut} = BackupPC::FileZIO->open($a->{fileName},
+ 1, $a->{compress});
+ if ( !defined($a->{fhOut}) ) {
+ push(@{$a->{errors}},
+ "Unable to open $a->{fileName}"
+ . " for writing after link fail");
+ } else {
+ $a->filePartialCopy($a->{files}[$i]->{fh}, $a->{fhOut},
+ $a->{nWrite});
+ $a->{fhOut}->close;
+ }
+ last;
}
- #print(" Linking $a->{fileName} to $a->{files}[0]->{name}\n");
- if ( @{$a->{files}} && !link($a->{files}[0]->{name}, $a->{fileName}) ) {
- push(@{$a->{errors}}, "Can't link $a->{fileName} to"
- . " $a->{files}[0]->{name}\n");
+ #
+ # Close the compare files
+ #
+ foreach my $f ( @{$a->{files}} ) {
+ $f->{fh}->close();
}
- return (1, $a->{digest}, -s $a->{fileName}, $a->{errors});
+ return (0, $a->{digest}, -s $a->{fileName}, $a->{errors});
}
}
return $a->write(undef);
}
+#
+# Abort a pool write
+#
+sub abort
+{
+ my($a) = @_;
+
+ if ( defined($a->{fhOut}) ) {
+ $a->{fhOut}->close();
+ unlink($a->{fileName});
+ }
+ foreach my $f ( @{$a->{files}} ) {
+ $f->{fh}->close();
+ }
+ $a->{files} = [];
+}
+
#
# Copy $nBytes from files $fhIn to $fhOut.
#
if ( $n != $thisRead ) {
push(@{$a->{errors}},
"Unable to read $thisRead bytes from "
- . $fhIn->name . " (got $n)\n");
+ . $fhIn->name . " (got $n)");
return;
}
$n = $fhOut->write(\$data, $thisRead);
if ( $n != $thisRead ) {
push(@{$a->{errors}},
"Unable to write $thisRead bytes to "
- . $fhOut->name . " (got $n)\n");
+ . $fhOut->name . " (got $n)");
return;
}
$nRead += $thisRead;
$n = $fh0->read(\$data0, $thisRead);
if ( $n != $thisRead ) {
push(@{$a->{errors}}, "Unable to read $thisRead bytes from "
- . $fh0->name . " (got $n)\n");
+ . $fh0->name . " (got $n)");
return;
}
$n = $fh1->read(\$data1, $thisRead);
return 1;
}
+#
+# LinkOrCopy() does a hardlink from oldFile to newFile.
+#
+# If that fails (because there are too many links on oldFile)
+# then oldFile is copied to newFile, and the pool stats are
+# returned to be added to the new file list. That allows
+# BackupPC_link to try again, and to create a new pool file
+# if necessary.
+#
+sub LinkOrCopy
+{
+ my($bpc, $oldFile, $oldFileComp, $newFile, $newFileComp) = @_;
+ my($nRead, $data);
+
+ unlink($newFile) if ( -f $newFile );
+ #
+ # Try to link if hardlink limit is ok, and compression types
+ # are the same
+ #
+ return (1, undef) if ( (stat($oldFile))[3] < $bpc->{Conf}{HardLinkMax}
+ && !$oldFileComp == !$newFileComp
+ && link($oldFile, $newFile) );
+ #
+ # There are too many links on oldFile, or compression
+ # type if different, so now we have to copy it.
+ #
+ # We need to compute the file size, which is expensive
+ # since we need to read the file twice. That's probably
+ # ok since the hardlink limit is rarely hit.
+ #
+ my $readFd = BackupPC::FileZIO->open($oldFile, 0, $oldFileComp);
+ if ( !defined($readFd) ) {
+ return (0, undef, undef, undef, ["LinkOrCopy: can't open $oldFile"]);
+ }
+ while ( $readFd->read(\$data, $BufSize) > 0 ) {
+ $nRead += length($data);
+ }
+ $readFd->rewind();
+
+ my $poolWrite = BackupPC::PoolWrite->new($bpc, $newFile,
+ $nRead, $newFileComp);
+ while ( $readFd->read(\$data, $BufSize) > 0 ) {
+ $poolWrite->write(\$data);
+ }
+ my($exists, $digest, $outSize, $errs) = $poolWrite->close;
+
+ return ($exists, $digest, $nRead, $outSize, $errs);
+}
+
1;