X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=bin%2FBackupPC_tarIncCreate;h=e27d9c97f9ea01e01269cc56abcd46175cc44f4f;hb=4f2737f1abb51bf25efa099622a73a02a345637f;hp=408cf5914d38fee674cf1010afc871a9a55df82f;hpb=69df0d40c6c92e4d04b757c3c7437a36873391b8;p=BackupPC.git diff --git a/bin/BackupPC_tarIncCreate b/bin/BackupPC_tarIncCreate index 408cf59..e27d9c9 100755 --- a/bin/BackupPC_tarIncCreate +++ b/bin/BackupPC_tarIncCreate @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/perl -w #============================================================= -*-perl-*- # # BackupPC_tarIncCreate: create a tar archive of an existing incremental dump @@ -6,7 +6,7 @@ # # DESCRIPTION # -# Usage: BackupPC_tarIncCreate [options] files/directories... +# Usage: BackupPC_tarIncCreate [options] # # Flags: # Required options: @@ -31,6 +31,8 @@ # # AUTHOR # Craig Barratt +# Ivan Klaric +# Dobrica Pavlinusic # # COPYRIGHT # Copyright (C) 2001-2003 Craig Barratt @@ -68,19 +70,25 @@ use BackupPC::Attrib qw(:all); use BackupPC::FileZIO; use BackupPC::View; use BackupPC::SearchLib; -use Data::Dumper; +use Time::HiRes qw/time/; +use POSIX qw/strftime/; +use File::Which; +use File::Path; +use File::Slurp; +use Data::Dumper; ### FIXME die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) ); my $TopDir = $bpc->TopDir(); my $BinDir = $bpc->BinDir(); my %Conf = $bpc->Conf(); -my @DBCache; -my $db_done = 0; +%BackupPC::SearchLib::Conf = %Conf; my %opts; +my $in_backup_increment; + -if ( !getopts("th:n:p:r:s:b:w:", \%opts) || @ARGV < 1 ) { +if ( !getopts("th:n:p:r:s:b:w:vdf", \%opts) ) { print STDERR <{$c} = which($c) || die "$0 needs $c, install it\n"; +} + my @Backups = $bpc->BackupInfoRead($Host); my $FileCnt = 0; my $ByteCnt = 0; my $DirCnt = 0; my $SpecialCnt = 0; my $ErrorCnt = 0; +my $current_tar_size = 0; +my $total_increment_size = 0; my $i; $Num = $Backups[@Backups + $Num]{num} if ( -@Backups <= $Num && $Num < 0 ); @@ -123,19 +139,42 @@ for ( $i = 0 ; $i < @Backups ; $i++ ) { last if ( $Backups[$i]{num} == $Num ); } if ( $i >= @Backups ) { - print(STDERR "$0: bad backup number $Num for host $Host\n"); - exit(1); + die "$0: bad backup number $Num for host $Host\n"; } my $PathRemove = $1 if ( $opts{r} =~ /(.+)/ ); my $PathAdd = $1 if ( $opts{p} =~ /(.+)/ ); if ( $opts{s} !~ /^([\w\s\.\/\$-]+)$/ && $opts{s} ne "*" ) { - print(STDERR "$0: bad share name '$opts{s}'\n"); - exit(1); + die "$0: bad share name '$opts{s}'\n"; } our $ShareName = $opts{s}; our $view = BackupPC::View->new($bpc, $Host, \@Backups); +# database + +my $dsn = $Conf{SearchDSN}; +my $db_user = $Conf{SearchUser} || ''; + +my $dbh = DBI->connect($dsn, $db_user, "", { RaiseError => 1, AutoCommit => 0} ); + +my $sth_inc_size = $dbh->prepare(qq{ + update backups set + inc_size = ?, + parts = ?, + inc_deleted = false + where id = ? +}); +my $sth_backup_parts = $dbh->prepare(qq{ + insert into backup_parts ( + backup_id, + part_nr, + tar_size, + size, + md5, + items + ) values (?,?,?,?,?,?) +}); + # # This constant and the line of code below that uses it are borrowed # from Archive::Tar. Thanks to Calle Dybedahl and Stephen Zander. @@ -158,34 +197,186 @@ my(%HardLinkExtraFiles, @HardLinks); # # Write out all the requested files/directories # -binmode(STDOUT); -my $fh = *STDOUT; -if ( $ShareName eq "*" ) { - my $PathRemoveOrig = $PathRemove; - my $PathAddOrig = $PathAdd; - foreach $ShareName ( $view->shareList($Num) ) { - #print(STDERR "Doing share ($ShareName)\n"); - $PathRemove = "/" if ( !defined($PathRemoveOrig) ); - ($PathAdd = "/$ShareName/$PathAddOrig") =~ s{//+}{/}g; - foreach my $dir ( @ARGV ) { - archiveWrite($fh, $dir); - } - archiveWriteHardLinks($fh); - } -} else { - foreach my $dir ( @ARGV ) { - archiveWrite($fh, $dir); - } - archiveWriteHardLinks($fh); + +my $max_file_size = $Conf{'MaxArchiveFileSize'} || die "problem with MaxArchiveFileSize parametar"; + +my $tar_dir = $Conf{InstallDir}.'/'.$Conf{GzipTempDir}; +die "problem with $tar_dir, check GzipTempDir in configuration\n" unless (-d $tar_dir && -w $tar_dir); + +my $tar_file = BackupPC::SearchLib::getGzipName($Host, $ShareName, $Num) || die "can't getGzipName($Host, $ShareName, $Num)"; + +my $tar_path_final = $tar_dir . '/' . $tar_file; +my $tar_path = $tar_path_final . '.tmp'; + +$tar_path =~ s#//#/#g; + +my $sth = $dbh->prepare(qq{ + SELECT + backups.id + FROM backups + JOIN shares on shares.id = shareid + JOIN hosts on hosts.id = shares.hostid + WHERE hosts.name = ? and shares.name = ? and backups.num = ? +}); +$sth->execute($Host, $ShareName, $Num); +my ($backup_id) = $sth->fetchrow_array; +$sth->finish; + + +# delete exising backup_parts +my $sth_delete_backup_parts = $dbh->prepare(qq{ + delete from backup_parts + where backup_id = ? +}); +$sth_delete_backup_parts->execute($backup_id); + + +print STDERR "backup_id: $backup_id working dir: $tar_dir, max uncompressed size $max_file_size bytes, tar $tar_file\n" if ($opts{d}); + +if (-e $tar_path_final) { + if ($opts{f}) { + rmtree $tar_path_final || die "can't remove $tar_path_final: $!"; + } else { + die "$tar_path_final allready exists\n"; + } } -# -# Finish with two null 512 byte headers, and then round out a full -# block. -# -my $data = "\0" x ($tar_header_length * 2); -TarWrite($fh, \$data); -TarWrite($fh, undef); +my $fh; +my $part = 0; +my $no_files = 0; +my $items_in_part = 0; + +sub new_tar_part { + my $arg = {@_}; + + if ($fh) { + return if ($current_tar_size == 0); + + print STDERR "\n\t+ $part:"; + + # + # Finish with two null 512 byte headers, + # and then round out a full block. + # + my $data = "\0" x ($tar_header_length * 2); + TarWrite($fh, \$data); + TarWrite($fh, undef); + + close($fh) || die "can't close archive part $part: $!"; + + my $file = $tar_path . '/' . $part; + + my $md5 = read_file( $file . '.md5' ) || die "can't read md5sum file ${file}.md5"; + $md5 =~ s/\s.*$//; + + my $size = (stat( $file . '.tar.gz' ))[7] || die "can't stat ${file}.tar.gz"; + + print "$file, $size bytes, $items_in_part items"; + + $sth_backup_parts->execute( + $backup_id, + $part, + $current_tar_size, + $size, + $md5, + $items_in_part, + ); + + $total_increment_size += $size; + + if ($arg->{close}) { + + sub move($$) { + my ($from,$to) = @_; + print STDERR "# rename $from -> $to\n" if ($opts{d}); + rename $from, $to || die "can't move $from -> $to: $!\n"; + } + + if ($part == 1) { + print STDERR " single" if ($opts{v}); + move("${tar_path}/1.tar.gz", "${tar_path_final}.tar.gz"); + move("${tar_path}/1.md5", "${tar_path_final}.md5"); + rmtree $tar_path or die "can't remove temporary dir $tar_path: $!"; + } else { + print STDERR " [last]" if ($opts{v}); + move("${tar_path}", "${tar_path_final}"); + + # if this archive was single part, remove it + foreach my $suffix (qw/.tar.gz .md5/) { + my $path = $tar_path_final . $suffix; + unlink $path if (-e $path); + } + } + + $sth_inc_size->execute( + $total_increment_size, + $part, + $backup_id + ); + + print "\n\ttotal $total_increment_size bytes"; + + return; + } + + } + + $part++; + + # if this is first part, create directory + + if ($part == 1) { + if (-e $tar_path) { + print STDERR "# deleting existing $tar_path\n" if ($opts{d}); + rmtree($tar_path); + } + mkdir($tar_path) || die "can't create directory $tar_path: $!"; + + sub abort_cleanup { + print STDERR "ABORTED: cleanup temp dir "; + rmtree($tar_path); + $dbh->rollback; + exit 1; + } + + $SIG{'INT'} = \&abort_cleanup; + $SIG{'QUIT'} = \&abort_cleanup; + $SIG{'__DIE__'} = \&abort_cleanup; + + } + + my $file = $tar_path . '/' . $part; + + # + # create comprex pipe which will pass output through gzip + # for compression, create file on disk using tee + # and pipe same output to md5sum to create checksum + # + + my $cmd = '| ' . $bin->{'gzip'} . ' ' . $Conf{GzipLevel} . ' ' . + '| ' . $bin->{'tee'} . ' ' . $file . '.tar.gz' . ' ' . + '| ' . $bin->{'md5sum'} . ' - > ' . $file . '.md5'; + + print STDERR "## $cmd\n" if ($opts{d}); + + open($fh, $cmd) or die "can't open $cmd: $!"; + binmode($fh); + + $current_tar_size = 0; + $items_in_part = 0; +} + +new_tar_part(); + +if (seedCache($Host, $ShareName, $Num)) { + archiveWrite($fh, '/'); + archiveWriteHardLinks($fh); + new_tar_part( close => 1 ); +} else { + print STDERR "NOTE: no files found for $Host:$ShareName, increment $Num\n" if ($opts{v}); + # remove temporary files if there are no files + rmtree($tar_path); +} # # print out totals if requested @@ -199,9 +390,16 @@ if ( $ErrorCnt && !$FileCnt && !$DirCnt ) { # Got errors, with no files or directories; exit with non-zero # status # - exit(1); + die "got errors or no files\n"; } -exit(0); + +$sth_inc_size->finish; +$sth_backup_parts->finish; + +$dbh->commit || die "can't commit changes to database"; +$dbh->disconnect(); + +exit; ########################################################################### # Subroutines @@ -266,6 +464,7 @@ sub TarWrite { my($fh, $dataRef) = @_; + if ( !defined($dataRef) ) { # # do flush by padding to a full $WriteBufSz @@ -273,6 +472,10 @@ sub TarWrite my $data = "\0" x ($WriteBufSz - length($WriteBuf)); $dataRef = \$data; } + + # poor man's tell :-) + $current_tar_size += length($$dataRef); + if ( length($WriteBuf) + length($$dataRef) < $WriteBufSz ) { # # just buffer and return @@ -283,14 +486,12 @@ sub TarWrite my $done = $WriteBufSz - length($WriteBuf); if ( syswrite($fh, $WriteBuf . substr($$dataRef, 0, $done)) != $WriteBufSz ) { - print(STDERR "Unable to write to output file ($!)\n"); - exit(1); + die "Unable to write to output file ($!)\n"; } while ( $done + $WriteBufSz <= length($$dataRef) ) { if ( syswrite($fh, substr($$dataRef, $done, $WriteBufSz)) != $WriteBufSz ) { - print(STDERR "Unable to write to output file ($!)\n"); - exit(1); + die "Unable to write to output file ($!)\n"; } $done += $WriteBufSz; } @@ -390,80 +591,50 @@ sub TarWriteFileInfo } # -# returns 1 if a given directory has files somewhere under it -# in a given dump of a given share +# seed cache of files in this increment # -sub checkSubDirs($$$$) { - my ($dir, $share, $host, $dumpNo) = @_; - my $ret; - my $dsn = $Conf{SearchDSN}; - my $db_user = $Conf{SearchUser} || ''; - my $search_sql; - - print(STDERR $dir); - # erase first dot - if (substr($dir, 0, 1) == '.') - { - $dir = substr($dir, 1, length($dir)); - } - # erase first slash - if (substr($dir, 0, 1) == '/') - { - $dir = substr($dir, 1, length($dir)); - } - # erase last slash - if (substr($dir, length($dir)-1, 1) == '/') - { - $dir = substr($dir, 0, length($dir)-1); +sub seedCache($$$) { + my ($host, $share, $dumpNo) = @_; + + print STDERR curr_time(), "$host:$share #$dumpNo" if ($opts{v}); + my $sql = q{ + SELECT path,size + FROM files + JOIN shares on shares.id = shareid + JOIN hosts on hosts.id = shares.hostid + WHERE hosts.name = ? and shares.name = ? and backupnum = ? + }; + + my $sth = $dbh->prepare($sql); + $sth->execute($host, $share, $dumpNo); + my $count = $sth->rows; + print STDERR " $count items, parts:" if ($opts{v}); + while (my $row = $sth->fetchrow_arrayref) { +#print STDERR "+ ", $row->[0],"\n"; + $in_backup_increment->{ $row->[0] } = $row->[1]; } + + $sth->finish(); - if (! $db_done) - { - print STDERR "doing db..."; - my $search_sql = q{ - SELECT hosts.name, shares.name, startfiles.name, COUNT(files.*) AS subfiles - FROM files startfiles - INNER JOIN shares ON (shares.id=startfiles.shareid) - INNER JOIN hosts ON (hosts.id=shares.hostid) - INNER JOIN backups ON ( - backups.num=startfiles.backupnum AND - backups.hostid=hosts.id AND backups.shareid=shares.id - ) - LEFT JOIN files ON ( - files.backupnum=startfiles.backupnum AND - files.shareid=startfiles.shareid AND - files.path LIKE startfiles.path || '/%' AND - files.type<>startfiles.type AND - files.id <> startfiles.id - ) - WHERE - hosts.name=? AND - shares.name=? AND - startfiles.type=? AND - startfiles.backupnum=? - GROUP BY hosts.name, shares.name, startfiles.name, startfiles.backupnum; - }; - my $dbh = DBI->connect($dsn, $db_user, "", { RaiseError => 1, AutoCommit => 1} ); - my $sth = $dbh->prepare($search_sql); - $sth->execute($host, $share, BPC_FTYPE_DIR, $dumpNo); - print STDERR "done\n"; - while (my @r_data = $sth->fetchrow_array()) - { - $DBCache[$r_data[0]][$r_data[1]][$r_data[2]] = 1; - } - - $sth->finish(); + return $count; +} - $DBCache[$host][$share][$dir] = $ret; - $dbh->disconnect(); - $db_done = 1; - } +# +# calculate overhad for one file in tar +# +sub tar_overhead($) { + my $name = shift || ''; + + # header, padding of file and two null blocks at end + my $len = 4 * $tar_header_length; - if ($DBCache[$host][$share][$dir] != undef && $DBCache[$host][$share][$dir] == 1) - { - return 1; + # if filename is longer than 99 chars subtract blocks for + # long filename + if ( length($name) > 99 ) { + $len += int( ( length($name) + $tar_header_length ) / $tar_header_length ) * $tar_header_length; } - return 0; + + return $len; } my $Attr; @@ -473,11 +644,25 @@ sub TarWriteFile { my($hdr, $fh, $tarPathOverride) = @_; - my $tarPath = $hdr->{relPath}; $tarPath = $tarPathOverride if ( defined($tarPathOverride) ); $tarPath =~ s{//+}{/}g; + + #print STDERR "? $tarPath\n" if ($opts{d}); + my $size = $in_backup_increment->{$tarPath}; + return unless (defined($size)); + + # is this file too large to fit into MaxArchiveFileSize? + + if ( ($current_tar_size + tar_overhead($tarPath) + $size) > $max_file_size ) { + print STDERR "# tar file $current_tar_size + $tar_header_length + $size > $max_file_size, splitting\n" if ($opts{d}); + new_tar_part(); + } + + #print STDERR "A $tarPath [$size] tell: $current_tar_size\n" if ($opts{d}); + $items_in_part++; + if ( defined($PathRemove) && substr($tarPath, 0, length($PathRemove)) eq $PathRemove ) { substr($tarPath, 0, length($PathRemove)) = $PathAdd; @@ -490,15 +675,9 @@ sub TarWriteFile # # Directory: just write the header # - - $hdr->{name} .= "/" if ( $hdr->{name} !~ m{/$} ); - # check if it has files under it in the database - if ( checkSubDirs($hdr->{path}, $ShareName, $Host, $Num) != 0 ) - { - TarWriteFileInfo($fh, $hdr); - $DirCnt++; - } + TarWriteFileInfo($fh, $hdr); + $DirCnt++; } elsif ( $hdr->{type} == BPC_FTYPE_FILE ) { # # Regular file: write the header and file @@ -509,24 +688,68 @@ sub TarWriteFile $ErrorCnt++; return; } - TarWriteFileInfo($fh, $hdr); - my($data, $size); - while ( $f->read(\$data, $BufSize) > 0 ) { - TarWrite($fh, \$data); - $size += length($data); - } - $f->close; - TarWritePad($fh, $size); + # do we need to split file? + if ($hdr->{size} < $max_file_size) { + TarWriteFileInfo($fh, $hdr); + my($data, $size); + while ( $f->read(\$data, $BufSize) > 0 ) { + TarWrite($fh, \$data); + $size += length($data); + } + $f->close; + TarWritePad($fh, $size); $FileCnt++; $ByteCnt += $size; + } else { + my $full_size = $hdr->{size}; + my $orig_name = $hdr->{name}; + my $max_part_size = $max_file_size - tar_overhead($hdr->{name}); + + my $parts = int(($full_size + $max_part_size - 1) / $max_part_size); + print STDERR "# splitting $orig_name [$full_size bytes] into $parts parts\n" if ($opts{d}); + foreach my $subpart ( 1 .. $parts ) { + new_tar_part(); + if ($subpart < $parts) { + $hdr->{size} = $max_part_size; + } else { + $hdr->{size} = $full_size % $max_part_size; + } + $hdr->{name} = $orig_name . '/' . $subpart; + print STDERR "## creating part $subpart ",$hdr->{name}, " [", $hdr->{size}," bytes]\n"; + + TarWriteFileInfo($fh, $hdr); + my($data, $size); +if (0) { + for ( 1 .. int($hdr->{size} / $BufSize) ) { + my $r_size = $f->read(\$data, $BufSize); + die "expected $BufSize bytes read, got $r_size bytes!" if ($r_size != $BufSize); + TarWrite($fh, \$data); + $size += length($data); + } +} + my $size_left = $hdr->{size} % $BufSize; + my $r_size = $f->read(\$data, $size_left); + die "expected $size_left bytes last read, got $r_size bytes!" if ($r_size != $size_left); + + TarWrite($fh, \$data); + $size += length($data); + TarWritePad($fh, $size); + + $items_in_part++; + } + $f->close; + $FileCnt++; + $ByteCnt += $full_size; + new_tar_part(); + } } elsif ( $hdr->{type} == BPC_FTYPE_HARDLINK ) { # # Hardlink file: either write a hardlink or the complete file - # depending upon whether the linked-to file will be written - # to the archive. + # depending upon whether the linked-to file will be written + # to the archive. # - # Start by reading the contents of the link. - # + # Start by reading the contents of the link. + # my $f = BackupPC::FileZIO->open($hdr->{fullPath}, 0, $hdr->{compress}); if ( !defined($f) ) { print(STDERR "Unable to open file $hdr->{fullPath}\n"); @@ -537,39 +760,26 @@ sub TarWriteFile while ( $f->read(\$data, $BufSize) > 0 ) { $hdr->{linkname} .= $data; } - $f->close; - # - # Check @ARGV and the list of hardlinked files we have explicity - # dumped to see if we have dumped this file or not - # - my $done = 0; - my $name = $hdr->{linkname}; - $name =~ s{^\./}{/}; - if ( $HardLinkExtraFiles{$name} ) { - $done = 1; - } else { - foreach my $arg ( @ARGV ) { - $arg =~ s{^\./+}{/}; - $arg =~ s{/+$}{}; - $done = 1 if ( $name eq $arg || $name =~ /^\Q$arg\// ); - } - } - if ( $done ) { - # - # Target file will be or was written, so just remember - # the hardlink so we can dump it later. - # - push(@HardLinks, $hdr); - $SpecialCnt++; - } else { - # - # Have to dump the original file. Just call the top-level - # routine, so that we save the hassle of dealing with - # mangling, merging and attributes. - # - $HardLinkExtraFiles{$hdr->{linkname}} = 1; - archiveWrite($fh, $hdr->{linkname}, $hdr->{name}); - } + $f->close; + my $done = 0; + my $name = $hdr->{linkname}; + $name =~ s{^\./}{/}; + if ( $HardLinkExtraFiles{$name} ) { + # + # Target file will be or was written, so just remember + # the hardlink so we can dump it later. + # + push(@HardLinks, $hdr); + $SpecialCnt++; + } else { + # + # Have to dump the original file. Just call the top-level + # routine, so that we save the hassle of dealing with + # mangling, merging and attributes. + # + $HardLinkExtraFiles{$hdr->{linkname}} = 1; + archiveWrite($fh, $hdr->{linkname}, $hdr->{name}); + } } elsif ( $hdr->{type} == BPC_FTYPE_SYMLINK ) { # # Symbolic link: read the symbolic link contents into the header @@ -621,4 +831,8 @@ sub TarWriteFile $ErrorCnt++; } } - + +my $t_fmt = '%Y-%m-%d %H:%M:%S'; +sub curr_time { + return strftime($t_fmt,localtime()); +}