1 #============================================================= -*-perl-*-
3 # BackupPC::Xfer::RsyncDigest package
7 # This library defines a BackupPC::Xfer::RsyncDigest class for computing
8 # and caching rsync checksums.
11 # Craig Barratt <cbarratt@users.sourceforge.net>
14 # Copyright (C) 2001-2003 Craig Barratt
16 # This program is free software; you can redistribute it and/or modify
17 # it under the terms of the GNU General Public License as published by
18 # the Free Software Foundation; either version 2 of the License, or
19 # (at your option) any later version.
21 # This program is distributed in the hope that it will be useful,
22 # but WITHOUT ANY WARRANTY; without even the implied warranty of
23 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 # GNU General Public License for more details.
26 # You should have received a copy of the GNU General Public License
27 # along with this program; if not, write to the Free Software
28 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 #========================================================================
32 # Version 2.1.0_CVS, released 3 Jul 2003.
34 # See http://backuppc.sourceforge.net.
36 #========================================================================
38 package BackupPC::Xfer::RsyncDigest;
42 use vars qw( $RsyncLibOK );
46 eval "use File::RsyncP;";
49 # File::RsyncP doesn't exist. Define some dummy constant
50 # subs so that the code below doesn't barf.
59 # Return the rsync block size based on the file size.
60 # We also make sure the block size plus 4 (ie: cheeksumSeed)
61 # is not a multiple of 64 - otherwise the cached checksums
62 # will not be the same for protocol versions <= 26 and > 26.
66 my($class, $fileSize, $defaultBlkSize) = @_;
68 my $blkSize = int($fileSize / 10000);
69 $blkSize = $defaultBlkSize if ( $blkSize < $defaultBlkSize );
70 $blkSize = 16384 if ( $blkSize > 16384 );
71 $blkSize += 4 if ( (($blkSize + 4) % 64) == 0 );
76 # Compute and add rsync block and file digests to the given file.
80 my($class, $file, $blockSize, $checksumSeed) = @_;
81 if ( $blockSize == 0 ) {
82 print("bogus digestAdd($file, $blockSize, $checksumSeed)\n");
85 my $nBlks = int(65536 * 16 / $blockSize) + 1;
86 my($data, $blockDigest, $fileDigest);
88 return if ( !$RsyncLibOK );
90 my $digest = File::RsyncP::Digest->new;
91 $digest->add(pack("V", $checksumSeed)) if ( $checksumSeed );
93 return -1 if ( !defined(my $fh = BackupPC::FileZIO->open($file, 0, 1)) );
95 $fh->read(\$data, $nBlks * $blockSize);
96 last if ( $data eq "" );
97 $blockDigest .= $digest->blockDigest($data, $blockSize, 16,
101 $fileDigest = $digest->digest2;
102 my $eofPosn = tell($fh->{fh});
104 my $rsyncData = $blockDigest . $fileDigest;
105 my $metaData = pack("VVVV", $blockSize,
107 length($blockDigest) / 20,
108 0x5fe3c289, # magic number
110 my $data2 = chr(0xb3) . $rsyncData . $metaData;
111 # printf("appending %d+%d bytes to %s at offset %d\n",
112 # length($rsyncData),
116 open(my $fh2, "+<", $file) || return -2;
118 return -3 if ( sysread($fh2, $data, 1) != 1 );
119 if ( $data ne chr(0x78) && $data ne chr(0xd6) ) {
120 printf("Unexpected first char 0x%x\n", ord($data));
123 return -5 if ( sysseek($fh2, $eofPosn, 0) != $eofPosn );
124 return -6 if ( syswrite($fh2, $data2) != length($data2) );
125 return -7 if ( !defined(sysseek($fh2, 0, 0)) );
126 return -8 if ( syswrite($fh2, chr(0xd6)) != 1 );
131 # Return rsync checksums for the given file. We read the cached checksums
132 # if they exist and the block size and checksum seed match. Otherwise
133 # we compute the checksums from the file contents.
137 my($class, $fileName, $fileSize, $blockSize, $defBlkSize,
138 $checksumSeed, $needMD4, $compress, $doCache) = @_;
140 return -1 if ( !$RsyncLibOK );
147 digest => File::RsyncP::Digest->new,
150 if ( $fileSize > 0 && $compress ) {
151 open(my $fh, "<", $fileName) || return -2;
153 return -3 if ( read($fh, $data, 1) != 1 );
154 if ( $data eq chr(0x78) && $doCache && $checksumSeed == 32761 ) {
156 # 32761 is the magic number that rsync uses for checksumSeed
157 # with the --fixed-csum option.
159 # We now add the cached checksum data to the file. There
160 # is a possible race condition here since two BackupPC_dump
161 # processes might call this function at the same time
162 # on the same file. But this should be ok since both
163 # processes will write the same data, and the order
164 # in which they write it doesn't matter.
167 $fio->digestAdd($fileName,
169 || BackupPC::Xfer::RsyncDigest->blockSize($fileSize,
173 # now re-open the file and re-read the first byte
175 open($fh, "<", $fileName) || return -2;
177 return -3 if ( read($fh, $data, 1) != 1 );
179 if ( $data eq chr(0xd6) ) {
181 # Looks like this file has cached checksums
182 # Read the last 48 bytes: that's 2 file MD4s (32 bytes)
183 # plus 4 words of meta data
185 return -4 if ( !defined(seek($fh, -48, 2)) );
186 return -5 if ( read($fh, $data, 48) != 48 );
187 ($fio->{md4DigestOld},
190 $fio->{checksumSeed},
192 $fio->{magic}) = unpack("a16 a16 V V V V", $data);
193 if ( $fio->{magic} == 0x5fe3c289
194 && $fio->{checksumSeed} == $checksumSeed
195 && ($blockSize == 0 || $fio->{blockSize} == $blockSize) ) {
202 # position the file at the start of the rsync block checksums
203 # (4 (adler) + 16 (md4) bytes each)
205 return -6 if ( !defined(seek($fh, -$fio->{nBlocks}*20 - 48, 2)) );
208 if ( !$fio->{cached} ) {
210 # This file doesn't have cached checksums, or the checksumSeed
211 # or blocksize doesn't match. Open the file and prepare to
212 # compute the checksums.
215 = BackupPC::Xfer::RsyncDigest->blockSize($fileSize, $defBlkSize)
216 if ( $blockSize == 0 );
217 $fio->{checksumSeed} = $checksumSeed;
218 $fio->{blockSize} = $blockSize;
219 $fio->{fh} = BackupPC::FileZIO->open($fileName, 0, $compress);
220 return -7 if ( !defined($fio->{fh}) );
222 $fio->{csumDigest} = File::RsyncP::Digest->new;
223 $fio->{csumDigest}->add(pack("V", $fio->{checksumSeed}));
226 return (undef, $fio, $fio->{blockSize});
231 my($fio, $num, $csumLen) = @_;
233 my $blockSize = $fio->{blockSize};
235 if ( $fio->{cached} ) {
237 $thisNum = $fio->{nBlocks} if ( $thisNum > $fio->{nBlocks} );
238 read($fio->{fh}, $fileData, 20 * $thisNum);
239 $fio->{nBlocks} -= $thisNum;
240 if ( $thisNum < $num ) {
242 # unexpected shortfall of data; pad with zero digest
244 $fileData .= pack("c", 0) x (20 * ($num - $thisNum));
246 return $fio->{digest}->blockDigestExtract($fileData, $csumLen);
248 if ( $fio->{fh}->read(\$fileData, $blockSize * $num) <= 0 ) {
250 # unexpected shortfall of data; pad with zeros
252 $fileData = pack("c", 0) x ($blockSize * $num);
254 $fio->{csumDigest}->add($fileData) if ( $fio->{needMD4} );
255 return $fio->{digest}->blockDigest($fileData, $blockSize,
256 $csumLen, $fio->{checksumSeed});
265 if ( $fio->{cached} ) {
267 return $fio->{md4DigestOld} if ( $fio->{needMD4} );
270 # make sure we read the entire file for the file MD4 digest
272 if ( $fio->{needMD4} ) {
274 while ( $fio->{fh}->read(\$fileData, 65536) > 0 ) {
275 $fio->{csumDigest}->add($fileData);
279 return $fio->{csumDigest}->digest if ( $fio->{needMD4} );