1 #============================================================= -*-perl-*-
3 # BackupPC::Xfer::RsyncDigest package
7 # This library defines a BackupPC::Xfer::RsyncDigest class for computing
8 # and caching rsync checksums.
11 # Craig Barratt <cbarratt@users.sourceforge.net>
14 # Copyright (C) 2001-2009 Craig Barratt
16 # This program is free software; you can redistribute it and/or modify
17 # it under the terms of the GNU General Public License as published by
18 # the Free Software Foundation; either version 2 of the License, or
19 # (at your option) any later version.
21 # This program is distributed in the hope that it will be useful,
22 # but WITHOUT ANY WARRANTY; without even the implied warranty of
23 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 # GNU General Public License for more details.
26 # You should have received a copy of the GNU General Public License
27 # along with this program; if not, write to the Free Software
28 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 #========================================================================
32 # Version 3.2.0, released 31 Jul 2010.
34 # See http://backuppc.sourceforge.net.
36 #========================================================================
38 package BackupPC::Xfer::RsyncDigest;
41 use BackupPC::FileZIO;
43 use vars qw( $RsyncLibOK );
47 use vars qw( @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS );
49 my $Log = \&logHandler;
52 # Magic value for checksum seed. We only cache block and file digests
53 # when the checksum seed matches this value.
55 use constant RSYNC_CSUMSEED_CACHE => 32761;
66 'all' => [ @EXPORT_OK ],
70 eval "use File::RsyncP;";
73 # File::RsyncP doesn't exist. Define some dummy constant
74 # subs so that the code below doesn't barf.
83 # Return the rsync block size based on the file size.
84 # We also make sure the block size plus 4 (ie: cheeksumSeed)
85 # is not a multiple of 64 - otherwise the cached checksums
86 # will not be the same for protocol versions <= 26 and > 26.
90 my($class, $fileSize, $defaultBlkSize) = @_;
92 my $blkSize = int($fileSize / 10000);
93 $blkSize = $defaultBlkSize if ( $blkSize < $defaultBlkSize );
94 $blkSize = 16384 if ( $blkSize > 16384 );
95 $blkSize += 4 if ( (($blkSize + 4) % 64) == 0 );
99 sub fileDigestIsCached
101 my($class, $file) = @_;
104 sysopen(my $fh, $file, O_RDONLY) || return -1;
106 return -2 if ( sysread($fh, $data, 1) != 1 );
108 return $data eq chr(0xd7) ? 1 : 0;
112 # Compute and add rsync block and file digests to the given file.
114 # Empty files don't get cached checksums.
116 # If verify is set then existing cached checksums are checked.
117 # If verify == 2 then only a verify is done; no fixes are applied.
119 # Returns 0 on success. Returns 1 on good verify and 2 on bad verify.
120 # Returns a variety of negative values on error.
124 my($class, $file, $blockSize, $checksumSeed, $verify,
125 $protocol_version) = @_;
129 # Don't cache checksums if the checksumSeed is not RSYNC_CSUMSEED_CACHE
130 # or if the file is empty.
132 return -100 if ( $checksumSeed != RSYNC_CSUMSEED_CACHE || !-s $file );
134 if ( $blockSize == 0 ) {
135 &$Log("digestAdd: bad blockSize ($file, $blockSize, $checksumSeed)");
138 my $nBlks = int(65536 * 16 / $blockSize) + 1;
139 my($data, $blockDigest, $fileDigest);
141 return -101 if ( !$RsyncLibOK );
143 my $digest = File::RsyncP::Digest->new;
144 $digest->protocol($protocol_version)
145 if ( defined($protocol_version) );
146 $digest->add(pack("V", $checksumSeed)) if ( $checksumSeed );
148 return -102 if ( !defined(my $fh = BackupPC::FileZIO->open($file, 0, 1)) );
152 $fh->read(\$data, $nBlks * $blockSize);
153 $fileSize += length($data);
154 last if ( $data eq "" );
155 $blockDigest .= $digest->blockDigest($data, $blockSize, 16,
159 $fileDigest = $digest->digest2;
160 my $eofPosn = sysseek($fh->{fh}, 0, 1);
162 my $rsyncData = $blockDigest . $fileDigest;
163 my $metaData = pack("VVVV", $blockSize,
165 length($blockDigest) / 20,
166 0x5fe3c289, # magic number
168 my $data2 = chr(0xb3) . $rsyncData . $metaData;
169 # printf("appending %d+%d bytes to %s at offset %d\n",
170 # length($rsyncData),
174 sysopen(my $fh2, $file, O_RDWR) || return -103;
176 return -104 if ( sysread($fh2, $data, 1) != 1 );
177 if ( $data ne chr(0x78) && $data ne chr(0xd6) && $data ne chr(0xd7) ) {
178 &$Log(sprintf("digestAdd: $file has unexpected first char 0x%x",
182 return -106 if ( sysseek($fh2, $eofPosn, 0) != $eofPosn );
187 # Verify the cached checksums
189 return -107 if ( $data ne chr(0xd7) );
190 return -108 if ( sysread($fh2, $data3, length($data2) + 1) < 0 );
191 if ( $data2 eq $data3 ) {
195 # Checksums don't agree - fall through so we rewrite the data
197 &$Log(sprintf("digestAdd: %s verify failed; redoing checksums; len = %d,%d; eofPosn = %d, fileSize = %d",
198 $file, length($data2), length($data3), $eofPosn, $fileSize));
199 #&$Log(sprintf("dataNew = %s", unpack("H*", $data2)));
200 #&$Log(sprintf("dataFile = %s", unpack("H*", $data3)));
201 return -109 if ( sysseek($fh2, $eofPosn, 0) != $eofPosn );
203 return $retValue if ( $verify == 2 );
205 return -110 if ( syswrite($fh2, $data2) != length($data2) );
208 # Make sure there is no extraneous data on the end of
209 # the file. Seek to the end and truncate if it doesn't
210 # match our expected length.
212 return -111 if ( !defined(sysseek($fh2, 0, 2)) );
213 if ( sysseek($fh2, 0, 1) != $eofPosn + length($data2) ) {
214 if ( !truncate($fh2, $eofPosn + length($data2)) ) {
215 &$Log(sprintf("digestAdd: $file truncate from %d to %d failed",
216 sysseek($fh2, 0, 1), $eofPosn + length($data2)));
219 &$Log(sprintf("digestAdd: %s truncated from %d to %d",
221 sysseek($fh2, 0, 1), $eofPosn + length($data2)));
225 return -113 if ( !defined(sysseek($fh2, 0, 0)) );
226 return -114 if ( syswrite($fh2, chr(0xd7)) != 1 );
232 # Return rsync checksums for the given file. We read the cached checksums
233 # if they exist and the block size and checksum seed match. Otherwise
234 # we compute the checksums from the file contents.
236 # The doCache flag can take three ranges:
238 # - doCache < 0: don't generate/use cached checksums
239 # - doCache == 0: don't generate, but do use cached checksums if available
240 # - doCache > 0: generate (if necessary) and use cached checksums
242 # Note: caching is only enabled when compression is on and the
243 # checksum seed is RSYNC_CSUMSEED_CACHE (32761).
245 # Returns 0 on success. Returns a variety of negative values on error.
249 my($class, $fileName, $fileSize, $blockSize, $defBlkSize,
250 $checksumSeed, $needMD4, $compress, $doCache, $protocol_version) = @_;
252 return -1 if ( !$RsyncLibOK );
259 digest => File::RsyncP::Digest->new,
260 protocol_version => $protocol_version,
263 $dg->{digest}->protocol($dg->{protocol_version})
264 if ( defined($dg->{protocol_version}) );
266 if ( $fileSize > 0 && $compress && $doCache >= 0 ) {
267 open(my $fh, "<", $fileName) || return -2;
269 return -3 if ( sysread($fh, $data, 4096) < 1 );
272 if ( (vec($data, 0, 8) == 0x78 || vec($data, 0, 8) == 0xd6) && $doCache > 0
273 && $checksumSeed == RSYNC_CSUMSEED_CACHE ) {
275 # RSYNC_CSUMSEED_CACHE (32761) is the magic number that
276 # rsync uses for checksumSeed with the --fixed-csum option.
278 # We now add the cached checksum data to the file. There
279 # is a possible race condition here since two BackupPC_dump
280 # processes might call this function at the same time
281 # on the same file. But this should be ok since both
282 # processes will write the same data, and the order
283 # in which they write it doesn't matter.
286 $ret = $dg->digestAdd($fileName,
288 || BackupPC::Xfer::RsyncDigest->blockSize(
289 $fileSize, $defBlkSize),
290 $checksumSeed, 0, $dg->{protocol_version});
292 &$Log("digestAdd($fileName) failed ($ret)");
295 # now re-open the file and re-read the first byte
297 open($fh, "<", $fileName) || return -4;
299 return -5 if ( read($fh, $data, 1) != 1 );
301 if ( $ret >= 0 && vec($data, 0, 8) == 0xd7 ) {
303 # Looks like this file has cached checksums
304 # Read the last 48 bytes: that's 2 file MD4s (32 bytes)
305 # plus 4 words of meta data
308 if ( length($data) >= 4096 ) {
309 return -6 if ( !defined(sysseek($fh, -4096, 2)) );
310 return -7 if ( sysread($fh, $data, 4096) != 4096 );
312 $cacheInfo = substr($data, -48);
313 ($dg->{md4DigestOld},
318 $dg->{magic}) = unpack("a16 a16 V V V V", $cacheInfo);
319 if ( $dg->{magic} == 0x5fe3c289
320 && $dg->{checksumSeed} == $checksumSeed
321 && ($blockSize == 0 || $dg->{blockSize} == $blockSize) ) {
324 if ( length($data) >= $dg->{nBlocks} * 20 + 48 ) {
326 # We have all the data already - just remember it
328 $dg->{digestData} = substr($data,
329 length($data) - $dg->{nBlocks} * 20 - 48,
330 $dg->{nBlocks} * 20);
333 # position the file at the start of the rsync block checksums
334 # (4 (adler) + 16 (md4) bytes each)
337 if ( !defined(sysseek($fh, -$dg->{nBlocks} * 20 - 48, 2)) );
341 # cached checksums are not valid, so we close the
342 # file and treat it as uncached.
344 $dg->{cachedInvalid} = 1;
349 if ( !$dg->{cached} ) {
351 # This file doesn't have cached checksums, or the checksumSeed
352 # or blocksize doesn't match. Open the file and prepare to
353 # compute the checksums.
356 = BackupPC::Xfer::RsyncDigest->blockSize($fileSize, $defBlkSize)
357 if ( $blockSize == 0 );
358 $dg->{checksumSeed} = $checksumSeed;
359 $dg->{blockSize} = $blockSize;
360 $dg->{fh} = BackupPC::FileZIO->open($fileName, 0, $compress);
361 return -9 if ( !defined($dg->{fh}) );
363 $dg->{csumDigest} = File::RsyncP::Digest->new;
364 $dg->{csumDigest}->protocol($dg->{protocol_version})
365 if ( defined($dg->{protocol_version}) );
366 $dg->{csumDigest}->add(pack("V", $dg->{checksumSeed}));
369 return (undef, $dg, $dg->{blockSize});
374 my($dg, $num, $csumLen, $noPad) = @_;
376 my $blockSize = $dg->{blockSize};
378 if ( $dg->{cached} ) {
380 $thisNum = $dg->{nBlocks} if ( $thisNum > $dg->{nBlocks} );
381 if ( defined($dg->{digestData}) ) {
382 $fileData = substr($dg->{digestData}, 0, 20 * $thisNum);
383 $dg->{digestData} = substr($dg->{digestData}, 20 * $thisNum);
385 sysread($dg->{fh}, $fileData, 20 * $thisNum);
387 $dg->{nBlocks} -= $thisNum;
388 if ( $thisNum < $num && !$noPad) {
390 # unexpected shortfall of data; pad with zero digest
392 $fileData .= pack("c", 0) x (20 * ($num - $thisNum));
394 return $dg->{digest}->blockDigestExtract($fileData, $csumLen);
396 if ( $dg->{fh}->read(\$fileData, $blockSize * $num) <= 0 ) {
398 # unexpected shortfall of data; pad with zeros
400 $fileData = pack("c", 0) x ($blockSize * $num) if ( !$noPad );
402 $dg->{csumDigest}->add($fileData) if ( $dg->{needMD4} );
403 return $dg->{digest}->blockDigest($fileData, $blockSize,
404 $csumLen, $dg->{checksumSeed});
410 my($dg, $skipMD4) = @_;
413 if ( $dg->{cached} ) {
415 if ( $dg->{needMD4} ) {
416 if ( $dg->{protocol_version} <= 26 ) {
417 return $dg->{md4DigestOld};
419 return $dg->{md4Digest};
424 # make sure we read the entire file for the file MD4 digest
426 if ( $dg->{needMD4} && !$skipMD4 ) {
428 while ( $dg->{fh}->read(\$fileData, 65536) > 0 ) {
429 $dg->{csumDigest}->add($fileData);
433 return $dg->{csumDigest}->digest if ( $dg->{needMD4} );
441 return wantarray ? ($dg->{cached}, $dg->{cachedInvalid}) : $dg->{cached};
448 return $dg->{blockSize};
452 # Default log handler
458 print(STDERR $str, "\n");
462 # Set log handler to a new subroutine.