X-Git-Url: http://git.rot13.org/?p=BackupPC.git;a=blobdiff_plain;f=lib%2FBackupPC%2FPoolWrite.pm;h=23d67d38d57a49acd21033a4c5f5e20a151fcc78;hp=1522051f57a08801623e8461ed311073466aa717;hb=f6fbcc3682d2bc9e7dfdc26e95bd5fcdb359496d;hpb=1ce7d1541ea1279aaa0a75c16986a3fd40b608ec diff --git a/lib/BackupPC/PoolWrite.pm b/lib/BackupPC/PoolWrite.pm index 1522051..23d67d3 100644 --- a/lib/BackupPC/PoolWrite.pm +++ b/lib/BackupPC/PoolWrite.pm @@ -38,7 +38,7 @@ # Craig Barratt # # COPYRIGHT -# Copyright (C) 2001 Craig Barratt +# Copyright (C) 2001-2007 Craig Barratt # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -56,7 +56,7 @@ # #======================================================================== # -# Version 1.5.0, released 2 Aug 2002. +# Version 3.2.0beta0, released 5 April 2009. # # See http://backuppc.sourceforge.net. # @@ -89,15 +89,23 @@ sub new eof => undef, }, $class; + $self->{hardLinkMax} = $bpc->ConfValue("HardLinkMax"); + # # Always unlink any current file in case it is already linked # unlink($fileName) if ( -f $fileName ); + if ( $fileName =~ m{(.*)/.+} && !-d $1 ) { + eval { mkpath($1, 0, 0777) }; + if ( $@ ) { + push(@{$self->{errors}}, "Unable to create directory $1 for $self->{fileName}"); + } + } return $self; } -my $BufSize = 1048576; # 1MB or 2^20 -my $MaxFiles = 20; +my $BufSize = 1048576; # 1MB or 2^20 +my $MaxFiles = 20; # max number of compare files open at one time sub write { @@ -106,23 +114,54 @@ sub write return if ( $a->{eof} ); $a->{data} .= $$dataRef if ( defined($dataRef) ); return if ( length($a->{data}) < $BufSize && defined($dataRef) ); - if ( !defined($a->{digest}) && $a->{fileSize} > 0 ) { + + # + # Correct the fileSize if it is wrong (rsync might transfer + # a file whose length is different to the length sent with the + # file list if the file changes between the file list sending + # and the file sending). Here we only catch the case where + # we haven't computed the digest (ie: we have written no more + # than $BufSize). We catch the big file case below. + # + if ( !defined($dataRef) && !defined($a->{digest}) + && $a->{fileSize} != length($a->{data}) ) { + #my $newSize = length($a->{data}); + #print("Fixing file size from $a->{fileSize} to $newSize\n"); + $a->{fileSize} = length($a->{data}); + } + + if ( !defined($a->{digest}) && length($a->{data}) > 0 ) { # # build a list of all the candidate matching files # my $md5 = Digest::MD5->new; + $a->{fileSize} = length($a->{data}) + if ( $a->{fileSize} < length($a->{data}) ); $a->{digest} = $a->{bpc}->Buffer2MD5($md5, $a->{fileSize}, \$a->{data}); if ( !defined($a->{base} = $a->{bpc}->MD52Path($a->{digest}, $a->{compress})) ) { push(@{$a->{errors}}, "Unable to get path from '$a->{digest}'" - . " for $a->{fileName}\n"); + . " for $a->{fileName}"); } else { while ( @{$a->{files}} < $MaxFiles ) { my $fh; my $fileName = $a->{fileCnt} < 0 ? $a->{base} : "$a->{base}_$a->{fileCnt}"; last if ( !-f $fileName ); - if ( !defined($fh = BackupPC::FileZIO->open($fileName, 0, + # + # Don't attempt to match pool files that already + # have too many hardlinks. Also, don't match pool + # files with only one link since starting in + # BackupPC v3.0, BackupPC_nightly could be running + # in parallel (and removing those files). This doesn't + # eliminate all possible race conditions, but just + # reduces the odds. Other design steps eliminate + # the remaining race conditions of linking vs + # removing. + # + if ( (stat(_))[3] >= $a->{hardLinkMax} + || (stat(_))[3] <= 1 + || !defined($fh = BackupPC::FileZIO->open($fileName, 0, $a->{compress})) ) { $a->{fileCnt}++; next; @@ -143,12 +182,12 @@ sub write 1, $a->{compress}); if ( !defined($a->{fhOut}) ) { push(@{$a->{errors}}, "Unable to open $a->{fileName}" - . " for writing\n"); + . " for writing"); } } } my $dataLen = length($a->{data}); - if ( !defined($a->{fhOut}) && $a->{fileSize} > 0 ) { + if ( !defined($a->{fhOut}) && length($a->{data}) > 0 ) { # # See if the new chunk of data continues to match the # candidate files. @@ -176,7 +215,8 @@ sub write # while ( -f $fileName ) { my $fh; - if ( !defined($fh = BackupPC::FileZIO->open($fileName, 0, + if ( (stat(_))[3] >= $a->{hardLinkMax} + || !defined($fh = BackupPC::FileZIO->open($fileName, 0, $a->{compress})) ) { $a->{fileCnt}++; #print(" Discarding $fileName (open failed)\n"); @@ -186,7 +226,7 @@ sub write if ( !$a->{files}[$i]->{fh}->rewind() ) { push(@{$a->{errors}}, "Unable to rewind $a->{files}[$i]->{name}" - . " for compare\n"); + . " for compare"); } $match = $a->filePartialCompare($a->{files}[$i]->{fh}, $fh, $a->{nWrite}, $dataLen, \$a->{data}); @@ -215,12 +255,12 @@ sub write if ( !defined($a->{fhOut}) ) { push(@{$a->{errors}}, "Unable to open $a->{fileName}" - . " for writing\n"); + . " for writing"); } else { if ( !$a->{files}[$i]->{fh}->rewind() ) { push(@{$a->{errors}}, "Unable to rewind" - . " $a->{files}[$i]->{name} for copy\n"); + . " $a->{files}[$i]->{name} for copy"); } $a->filePartialCopy($a->{files}[$i]->{fh}, $a->{fhOut}, $a->{nWrite}); @@ -239,7 +279,7 @@ sub write my $n = $a->{fhOut}->write(\$a->{data}); if ( $n != $dataLen ) { push(@{$a->{errors}}, "Unable to write $dataLen bytes to" - . " $a->{fileName} (got $n)\n"); + . " $a->{fileName} (got $n)"); } } $a->{nWrite} += $dataLen; @@ -250,42 +290,174 @@ sub write # We are at EOF, so finish up # $a->{eof} = 1; - foreach my $f ( @{$a->{files}} ) { - $f->{fh}->close(); + + # + # Make sure the fileSize was correct. See above for comments about + # rsync. + # + if ( $a->{nWrite} != $a->{fileSize} ) { + # + # Oops, fileSize was wrong, so our MD5 digest was wrong and our + # effort to match files likely failed. This is ugly, but our + # only choice at this point is to re-write the entire file with + # the correct length. We need to rename the file, open it for + # reading, and then re-write the file with the correct length. + # + + #print("Doing big file fixup ($a->{fileSize} != $a->{nWrite})\n"); + + my($fh, $fileName); + $a->{fileSize} = $a->{nWrite}; + + if ( defined($a->{fhOut}) ) { + if ( $a->{fileName} =~ /(.*)\// ) { + $fileName = $1; + } else { + $fileName = "."; + } + # + # Find a unique target temporary file name + # + my $i = 0; + while ( -f "$fileName/t$$.$i" ) { + $i++; + } + $fileName = "$fileName/t$$.$i"; + $a->{fhOut}->close(); + if ( !rename($a->{fileName}, $fileName) + || !defined($fh = BackupPC::FileZIO->open($fileName, 0, + $a->{compress})) ) { + push(@{$a->{errors}}, "Can't rename $a->{fileName} -> $fileName" + . " or open during size fixup"); + } + #print("Using temporary name $fileName\n"); + } elsif ( defined($a->{files}) && defined($a->{files}[0]) ) { + # + # We haven't written anything yet, so just use the + # compare file to copy from. + # + $fh = $a->{files}[0]->{fh}; + $fh->rewind; + #print("Using compare file $a->{files}[0]->{name}\n"); + } + if ( defined($fh) ) { + my $poolWrite = BackupPC::PoolWrite->new($a->{bpc}, $a->{fileName}, + $a->{fileSize}, $a->{compress}); + my $nRead = 0; + + while ( $nRead < $a->{fileSize} ) { + my $thisRead = $a->{fileSize} - $nRead < $BufSize + ? $a->{fileSize} - $nRead : $BufSize; + my $data; + my $n = $fh->read(\$data, $thisRead); + if ( $n != $thisRead ) { + push(@{$a->{errors}}, + "Unable to read $thisRead bytes during resize" + . " from temp $fileName (got $n)"); + last; + } + $poolWrite->write(\$data); + $nRead += $thisRead; + } + $fh->close; + unlink($fileName) if ( defined($fileName) ); + if ( @{$a->{errors}} ) { + $poolWrite->close; + return (0, $a->{digest}, -s $a->{fileName}, $a->{errors}); + } else { + return $poolWrite->close; + } + } } + if ( $a->{fileSize} == 0 ) { # # Simply create an empty file # local(*OUT); - if ( !open(OUT, ">$a->{fileName}") ) { + if ( !open(OUT, ">", $a->{fileName}) ) { push(@{$a->{errors}}, "Can't open $a->{fileName} for empty" - . " output\n"); + . " output"); } else { close(OUT); } + # + # Close the compare files + # + foreach my $f ( @{$a->{files}} ) { + $f->{fh}->close(); + } return (1, $a->{digest}, -s $a->{fileName}, $a->{errors}); } elsif ( defined($a->{fhOut}) ) { $a->{fhOut}->close(); + # + # Close the compare files + # + foreach my $f ( @{$a->{files}} ) { + $f->{fh}->close(); + } return (0, $a->{digest}, -s $a->{fileName}, $a->{errors}); } else { if ( @{$a->{files}} == 0 ) { push(@{$a->{errors}}, "Botch, no matches on $a->{fileName}" - . " ($a->{digest})\n"); + . " ($a->{digest})"); } elsif ( @{$a->{files}} > 1 ) { - my $str = "Unexpected multiple matches on" - . " $a->{fileName} ($a->{digest})\n"; - for ( my $i = 0 ; $i < @{$a->{files}} ; $i++ ) { - $str .= " -> $a->{files}[$i]->{name}\n"; + # + # This is no longer a real error because $Conf{HardLinkMax} + # could be hit, thereby creating identical pool files + # + #my $str = "Unexpected multiple matches on" + # . " $a->{fileName} ($a->{digest})\n"; + #for ( my $i = 0 ; $i < @{$a->{files}} ; $i++ ) { + # $str .= " -> $a->{files}[$i]->{name}\n"; + #} + #push(@{$a->{errors}}, $str); + } + for ( my $i = 0 ; $i < @{$a->{files}} ; $i++ ) { + if ( link($a->{files}[$i]->{name}, $a->{fileName}) ) { + #print(" Linked $a->{fileName} to $a->{files}[$i]->{name}\n"); + # + # Close the compare files + # + foreach my $f ( @{$a->{files}} ) { + $f->{fh}->close(); + } + return (1, $a->{digest}, -s $a->{fileName}, $a->{errors}); + } + } + # + # We were unable to link to the pool. Either we're at the + # hardlink max, or the pool file got deleted. Recover by + # writing the matching file, since we still have an open + # handle. + # + for ( my $i = 0 ; $i < @{$a->{files}} ; $i++ ) { + if ( !$a->{files}[$i]->{fh}->rewind() ) { + push(@{$a->{errors}}, + "Unable to rewind $a->{files}[$i]->{name}" + . " for copy after link fail"); + next; } - push(@{$a->{errors}}, $str); + $a->{fhOut} = BackupPC::FileZIO->open($a->{fileName}, + 1, $a->{compress}); + if ( !defined($a->{fhOut}) ) { + push(@{$a->{errors}}, + "Unable to open $a->{fileName}" + . " for writing after link fail"); + } else { + $a->filePartialCopy($a->{files}[$i]->{fh}, $a->{fhOut}, + $a->{nWrite}); + $a->{fhOut}->close; + } + last; } - #print(" Linking $a->{fileName} to $a->{files}[0]->{name}\n"); - if ( @{$a->{files}} && !link($a->{files}[0]->{name}, $a->{fileName}) ) { - push(@{$a->{errors}}, "Can't link $a->{fileName} to" - . " $a->{files}[0]->{name}\n"); + # + # Close the compare files + # + foreach my $f ( @{$a->{files}} ) { + $f->{fh}->close(); } - return (1, $a->{digest}, -s $a->{fileName}, $a->{errors}); + return (0, $a->{digest}, -s $a->{fileName}, $a->{errors}); } } @@ -302,6 +474,23 @@ sub close return $a->write(undef); } +# +# Abort a pool write +# +sub abort +{ + my($a) = @_; + + if ( defined($a->{fhOut}) ) { + $a->{fhOut}->close(); + unlink($a->{fileName}); + } + foreach my $f ( @{$a->{files}} ) { + $f->{fh}->close(); + } + $a->{files} = []; +} + # # Copy $nBytes from files $fhIn to $fhOut. # @@ -318,14 +507,14 @@ sub filePartialCopy if ( $n != $thisRead ) { push(@{$a->{errors}}, "Unable to read $thisRead bytes from " - . $fhIn->name . " (got $n)\n"); + . $fhIn->name . " (got $n)"); return; } $n = $fhOut->write(\$data, $thisRead); if ( $n != $thisRead ) { push(@{$a->{errors}}, "Unable to write $thisRead bytes to " - . $fhOut->name . " (got $n)\n"); + . $fhOut->name . " (got $n)"); return; } $nRead += $thisRead; @@ -348,7 +537,7 @@ sub filePartialCompare $n = $fh0->read(\$data0, $thisRead); if ( $n != $thisRead ) { push(@{$a->{errors}}, "Unable to read $thisRead bytes from " - . $fh0->name . " (got $n)\n"); + . $fh0->name . " (got $n)"); return; } $n = $fh1->read(\$data1, $thisRead); @@ -367,4 +556,53 @@ sub filePartialCompare return 1; } +# +# LinkOrCopy() does a hardlink from oldFile to newFile. +# +# If that fails (because there are too many links on oldFile) +# then oldFile is copied to newFile, and the pool stats are +# returned to be added to the new file list. That allows +# BackupPC_link to try again, and to create a new pool file +# if necessary. +# +sub LinkOrCopy +{ + my($bpc, $oldFile, $oldFileComp, $newFile, $newFileComp) = @_; + my($nRead, $data); + + unlink($newFile) if ( -f $newFile ); + # + # Try to link if hardlink limit is ok, and compression types + # are the same + # + return (1, undef) if ( (stat($oldFile))[3] < $bpc->{Conf}{HardLinkMax} + && !$oldFileComp == !$newFileComp + && link($oldFile, $newFile) ); + # + # There are too many links on oldFile, or compression + # type if different, so now we have to copy it. + # + # We need to compute the file size, which is expensive + # since we need to read the file twice. That's probably + # ok since the hardlink limit is rarely hit. + # + my $readFd = BackupPC::FileZIO->open($oldFile, 0, $oldFileComp); + if ( !defined($readFd) ) { + return (0, undef, undef, undef, ["LinkOrCopy: can't open $oldFile"]); + } + while ( $readFd->read(\$data, $BufSize) > 0 ) { + $nRead += length($data); + } + $readFd->rewind(); + + my $poolWrite = BackupPC::PoolWrite->new($bpc, $newFile, + $nRead, $newFileComp); + while ( $readFd->read(\$data, $BufSize) > 0 ) { + $poolWrite->write(\$data); + } + my($exists, $digest, $outSize, $errs) = $poolWrite->close; + + return ($exists, $digest, $nRead, $outSize, $errs); +} + 1;