#!/usr/bin/perl -w #============================================================= -*-perl-*- # # BackupPC_tarIncCreate: create a tar archive of an existing incremental dump # # # DESCRIPTION # # Usage: BackupPC_tarIncCreate [options] # # Flags: # Required options: # # -h host Host from which the tar archive is created. # -n dumpNum Dump number from which the tar archive is created. # A negative number means relative to the end (eg -1 # means the most recent dump, -2 2nd most recent etc). # -s shareName Share name from which the tar archive is created. # # Other options: # -t print summary totals # -r pathRemove path prefix that will be replaced with pathAdd # -p pathAdd new path prefix # -b BLOCKS BLOCKS x 512 bytes per record (default 20; same as tar) # -w writeBufSz write buffer size (default 1MB) # # The -h, -n and -s options specify which dump is used to generate # the tar archive. The -r and -p options can be used to relocate # the paths in the tar archive so extracted files can be placed # in a location different from their original location. # # AUTHOR # Craig Barratt # Ivan Klaric # Dobrica Pavlinusic # # COPYRIGHT # Copyright (C) 2001-2003 Craig Barratt # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # #======================================================================== # # Version 2.1.0, released 20 Jun 2004. # # See http://backuppc.sourceforge.net. # #======================================================================== use strict; no utf8; use lib "/usr/local/BackupPC/lib"; use File::Path; use Getopt::Std; use DBI; use BackupPC::Lib; use BackupPC::Attrib qw(:all); use BackupPC::FileZIO; use BackupPC::View; use BackupPC::Search; use Time::HiRes qw/time/; use POSIX qw/strftime/; use File::Which; use File::Slurp; use Data::Dumper; ### FIXME die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) ); my $TopDir = $bpc->TopDir(); my $BinDir = $bpc->BinDir(); my %Conf = $bpc->Conf(); %BackupPC::Search::Conf = %Conf; my %opts; my $in_backup_increment; if ( !getopts("th:n:p:r:s:b:w:vdf", \%opts) ) { print STDERR <{$c} = which($c) || die "$0 needs $c, install it\n"; } my @Backups = $bpc->BackupInfoRead($Host); my $FileCnt = 0; my $ByteCnt = 0; my $DirCnt = 0; my $SpecialCnt = 0; my $ErrorCnt = 0; my $current_tar_size = 0; my $total_increment_size = 0; my $i; $Num = $Backups[@Backups + $Num]{num} if ( -@Backups <= $Num && $Num < 0 ); for ( $i = 0 ; $i < @Backups ; $i++ ) { last if ( $Backups[$i]{num} == $Num ); } if ( $i >= @Backups ) { die "$0: bad backup number $Num for host $Host\n"; } my $PathRemove = $1 if ( $opts{r} =~ /(.+)/ ); my $PathAdd = $1 if ( $opts{p} =~ /(.+)/ ); if ( $opts{s} !~ /^([\w\s\.\/\$-]+)$/ && $opts{s} ne "*" ) { die "$0: bad share name '$opts{s}'\n"; } our $ShareName = $opts{s}; our $view = BackupPC::View->new($bpc, $Host, \@Backups); # database my $dsn = $Conf{SearchDSN}; my $db_user = $Conf{SearchUser} || ''; my $dbh = DBI->connect($dsn, $db_user, "", { RaiseError => 1, AutoCommit => 0} ); my $sth_inc_size = $dbh->prepare(qq{ update backups set inc_size = ?, inc_deleted = false where id = ? }); my $sth_backup_parts = $dbh->prepare(qq{ insert into backup_parts ( backup_id, part_nr, tar_size, size, md5, items ) values (?,?,?,?,?,?) }); # # This constant and the line of code below that uses it are borrowed # from Archive::Tar. Thanks to Calle Dybedahl and Stephen Zander. # See www.cpan.org. # # Archive::Tar is Copyright 1997 Calle Dybedahl. All rights reserved. # Copyright 1998 Stephen Zander. All rights reserved. # my $tar_pack_header = 'a100 a8 a8 a8 a12 a12 A8 a1 a100 a6 a2 a32 a32 a8 a8 a155 x12'; my $tar_header_length = 512; my $BufSize = $opts{w} || 1048576; # 1MB or 2^20 my $WriteBuf = ""; my $WriteBufSz = ($opts{b} || 20) * $tar_header_length; my(%UidCache, %GidCache); my(%HardLinkExtraFiles, @HardLinks); # # Write out all the requested files/directories # my $max_file_size = $Conf{'MaxArchiveFileSize'} || die "problem with MaxArchiveFileSize parametar"; my $tar_dir = $Conf{GzipTempDir}; die "problem with $tar_dir, check GzipTempDir in configuration\n" unless (-d $tar_dir && -w $tar_dir); my $tar_file = BackupPC::Search::getGzipName($Host, $ShareName, $Num) || die "can't getGzipName($Host, $ShareName, $Num)"; my $tar_path_final = $tar_dir . '/' . $tar_file; my $tar_path = $tar_path_final . '.tmp'; $tar_path =~ s#//#/#g; my $sth = $dbh->prepare(qq{ SELECT backups.id FROM backups JOIN shares on shares.id = shareid JOIN hosts on hosts.id = shares.hostid WHERE hosts.name = ? and shares.name = ? and backups.num = ? }); $sth->execute($Host, $ShareName, $Num); my ($backup_id) = $sth->fetchrow_array; $sth->finish; # delete exising backup_parts my $sth_delete_backup_parts = $dbh->prepare(qq{ delete from backup_parts where backup_id = ? }); $sth_delete_backup_parts->execute($backup_id); print STDERR "backup_id: $backup_id working dir: $tar_dir, max uncompressed size $max_file_size bytes, tar $tar_file\n" if ($opts{d}); if (-e $tar_path_final) { if ($opts{f}) { rmtree $tar_path_final || die "can't remove $tar_path_final: $!"; } else { die "$tar_path_final allready exists\n"; } } my $fh; my $part = 0; my $no_files = 0; my $items_in_part = 0; sub new_tar_part { my $arg = {@_}; if ($fh) { return if ($current_tar_size == 0); print STDERR "\n\t+ $part:"; # # Finish with two null 512 byte headers, # and then round out a full block. # my $data = "\0" x ($tar_header_length * 2); TarWrite($fh, \$data); TarWrite($fh, undef); close($fh) || die "can't close archive part $part: $!"; my $file = $tar_path . '/' . $part; my $md5 = read_file( $file . '.md5' ) || die "can't read md5sum file ${file}.md5"; $md5 =~ s/\s.*$//; my $size = (stat( $file . '.tar.gz' ))[7] || die "can't stat ${file}.tar.gz"; print "$file, $size bytes, $items_in_part items"; $sth_backup_parts->execute( $backup_id, $part, $current_tar_size, $size, $md5, $items_in_part, ); $total_increment_size += $size; if ($arg->{close}) { sub move($$) { my ($from,$to) = @_; print STDERR "# rename $from -> $to\n" if ($opts{d}); rename $from, $to || die "can't move $from -> $to: $!\n"; } if ($part == 1) { print STDERR " single" if ($opts{v}); move("${tar_path}/1.tar.gz", "${tar_path_final}.tar.gz"); move("${tar_path}/1.md5", "${tar_path_final}.md5"); rmtree $tar_path or die "can't remove temporary dir $tar_path: $!"; } else { print STDERR " [last]" if ($opts{v}); move("${tar_path}", "${tar_path_final}"); # if this archive was single part, remove it foreach my $suffix (qw/.tar.gz .md5/) { my $path = $tar_path_final . $suffix; unlink $path if (-e $path); } } $sth_inc_size->execute( $total_increment_size, $backup_id ); print "\n\ttotal $total_increment_size bytes"; return; } } $part++; # if this is first part, create directory if ($part == 1) { if (-e $tar_path) { print STDERR "# deleting existing $tar_path\n" if ($opts{d}); rmtree($tar_path); } mkdir($tar_path) || die "can't create directory $tar_path: $!"; sub abort_cleanup { print STDERR "ABORTED: cleanup temp dir "; rmtree($tar_path); $dbh->rollback; exit 1; } $SIG{'INT'} = \&abort_cleanup; $SIG{'QUIT'} = \&abort_cleanup; $SIG{'__DIE__'} = \&abort_cleanup; } my $file = $tar_path . '/' . $part; # # create comprex pipe which will pass output through gzip # for compression, create file on disk using tee # and pipe same output to md5sum to create checksum # my $cmd = '| ' . $bin->{'gzip'} . ' ' . $Conf{GzipLevel} . ' ' . '| ' . $bin->{'tee'} . ' ' . $file . '.tar.gz' . ' ' . '| ' . $bin->{'md5sum'} . ' - > ' . $file . '.md5'; print STDERR "## $cmd\n" if ($opts{d}); open($fh, $cmd) or die "can't open $cmd: $!"; binmode($fh); $current_tar_size = 0; $items_in_part = 0; } new_tar_part(); if (seedCache($Host, $ShareName, $Num)) { archiveWrite($fh, '/'); archiveWriteHardLinks($fh); new_tar_part( close => 1 ); } else { print STDERR "NOTE: no files found for $Host:$ShareName, increment $Num\n" if ($opts{v}); # remove temporary files if there are no files rmtree($tar_path); my $sth = $dbh->prepare(qq{ update backups set inc_size = 0, inc_deleted = true where id = ? }); $sth->execute($backup_id); } # # print out totals if requested # if ( $opts{t} ) { print STDERR "Done: $FileCnt files, $ByteCnt bytes, $DirCnt dirs,", " $SpecialCnt specials, $ErrorCnt errors\n"; } if ( $ErrorCnt && !$FileCnt && !$DirCnt ) { # # Got errors, with no files or directories; exit with non-zero # status # die "got errors or no files\n"; } $sth_inc_size->finish; $sth_backup_parts->finish; $dbh->commit || die "can't commit changes to database"; $dbh->disconnect(); exit; ########################################################################### # Subroutines ########################################################################### sub archiveWrite { my($fh, $dir, $tarPathOverride) = @_; if ( $dir =~ m{(^|/)\.\.(/|$)} ) { print(STDERR "$0: bad directory '$dir'\n"); $ErrorCnt++; return; } $dir = "/" if ( $dir eq "." ); #print(STDERR "calling find with $Num, $ShareName, $dir\n"); if ( $view->find($Num, $ShareName, $dir, 0, \&TarWriteFile, $fh, $tarPathOverride) < 0 ) { print(STDERR "$0: bad share or directory '$ShareName/$dir'\n"); $ErrorCnt++; return; } } # # Write out any hardlinks (if any) # sub archiveWriteHardLinks { my $fh = @_; foreach my $hdr ( @HardLinks ) { $hdr->{size} = 0; if ( defined($PathRemove) && substr($hdr->{linkname}, 0, length($PathRemove)+1) eq ".$PathRemove" ) { substr($hdr->{linkname}, 0, length($PathRemove)+1) = ".$PathAdd"; } TarWriteFileInfo($fh, $hdr); } @HardLinks = (); %HardLinkExtraFiles = (); } sub UidLookup { my($uid) = @_; $UidCache{$uid} = (getpwuid($uid))[0] if ( !exists($UidCache{$uid}) ); return $UidCache{$uid}; } sub GidLookup { my($gid) = @_; $GidCache{$gid} = (getgrgid($gid))[0] if ( !exists($GidCache{$gid}) ); return $GidCache{$gid}; } sub TarWrite { my($fh, $dataRef) = @_; if ( !defined($dataRef) ) { # # do flush by padding to a full $WriteBufSz # my $data = "\0" x ($WriteBufSz - length($WriteBuf)); $dataRef = \$data; } # poor man's tell :-) $current_tar_size += length($$dataRef); if ( length($WriteBuf) + length($$dataRef) < $WriteBufSz ) { # # just buffer and return # $WriteBuf .= $$dataRef; return; } my $done = $WriteBufSz - length($WriteBuf); if ( syswrite($fh, $WriteBuf . substr($$dataRef, 0, $done)) != $WriteBufSz ) { die "Unable to write to output file ($!)\n"; } while ( $done + $WriteBufSz <= length($$dataRef) ) { if ( syswrite($fh, substr($$dataRef, $done, $WriteBufSz)) != $WriteBufSz ) { die "Unable to write to output file ($!)\n"; } $done += $WriteBufSz; } $WriteBuf = substr($$dataRef, $done); } sub TarWritePad { my($fh, $size) = @_; if ( $size % $tar_header_length ) { my $data = "\0" x ($tar_header_length - ($size % $tar_header_length)); TarWrite($fh, \$data); } } sub TarWriteHeader { my($fh, $hdr) = @_; $hdr->{uname} = UidLookup($hdr->{uid}) if ( !defined($hdr->{uname}) ); $hdr->{gname} = GidLookup($hdr->{gid}) if ( !defined($hdr->{gname}) ); my $devmajor = defined($hdr->{devmajor}) ? sprintf("%07o", $hdr->{devmajor}) : ""; my $devminor = defined($hdr->{devminor}) ? sprintf("%07o", $hdr->{devminor}) : ""; my $sizeStr; if ( $hdr->{size} >= 2 * 65536 * 65536 ) { # # GNU extension for files >= 8GB: send size in big-endian binary # $sizeStr = pack("c4 N N", 0x80, 0, 0, 0, $hdr->{size} / (65536 * 65536), $hdr->{size} % (65536 * 65536)); } elsif ( $hdr->{size} >= 1 * 65536 * 65536 ) { # # sprintf octal only handles up to 2^32 - 1 # $sizeStr = sprintf("%03o", $hdr->{size} / (1 << 24)) . sprintf("%08o", $hdr->{size} % (1 << 24)); } else { $sizeStr = sprintf("%011o", $hdr->{size}); } my $data = pack($tar_pack_header, substr($hdr->{name}, 0, 99), sprintf("%07o", $hdr->{mode}), sprintf("%07o", $hdr->{uid}), sprintf("%07o", $hdr->{gid}), $sizeStr, sprintf("%011o", $hdr->{mtime}), "", #checksum field - space padded by pack("A8") $hdr->{type}, substr($hdr->{linkname}, 0, 99), $hdr->{magic} || 'ustar ', $hdr->{version} || ' ', $hdr->{uname}, $hdr->{gname}, $devmajor, $devminor, "" # prefix is empty ); substr($data, 148, 7) = sprintf("%06o\0", unpack("%16C*",$data)); TarWrite($fh, \$data); } sub TarWriteFileInfo { my($fh, $hdr) = @_; # # Handle long link names (symbolic links) # if ( length($hdr->{linkname}) > 99 ) { my %h; my $data = $hdr->{linkname} . "\0"; $h{name} = "././\@LongLink"; $h{type} = "K"; $h{size} = length($data); TarWriteHeader($fh, \%h); TarWrite($fh, \$data); TarWritePad($fh, length($data)); } # # Handle long file names # if ( length($hdr->{name}) > 99 ) { my %h; my $data = $hdr->{name} . "\0"; $h{name} = "././\@LongLink"; $h{type} = "L"; $h{size} = length($data); TarWriteHeader($fh, \%h); TarWrite($fh, \$data); TarWritePad($fh, length($data)); } TarWriteHeader($fh, $hdr); } # # seed cache of files in this increment # sub seedCache($$$) { my ($host, $share, $dumpNo) = @_; print STDERR curr_time(), "$host:$share #$dumpNo" if ($opts{v}); my $sql = q{ SELECT path,size FROM files JOIN shares on shares.id = shareid JOIN hosts on hosts.id = shares.hostid WHERE hosts.name = ? and shares.name = ? and backupnum = ? }; my $sth = $dbh->prepare($sql); $sth->execute($host, $share, $dumpNo); my $count = $sth->rows; print STDERR " $count items, parts:" if ($opts{v}); while (my $row = $sth->fetchrow_arrayref) { #print STDERR "+ ", $row->[0],"\n"; $in_backup_increment->{ $row->[0] } = $row->[1]; } $sth->finish(); return $count; } # # calculate overhad for one file in tar # sub tar_overhead($) { my $name = shift || ''; # header, padding of file and two null blocks at end my $len = 4 * $tar_header_length; # if filename is longer than 99 chars subtract blocks for # long filename if ( length($name) > 99 ) { $len += int( ( length($name) + $tar_header_length ) / $tar_header_length ) * $tar_header_length; } return $len; } my $Attr; my $AttrDir; sub TarWriteFile { my($hdr, $fh, $tarPathOverride) = @_; my $tarPath = $hdr->{relPath}; $tarPath = $tarPathOverride if ( defined($tarPathOverride) ); $tarPath =~ s{//+}{/}g; #print STDERR "? $tarPath\n" if ($opts{d}); my $size = $in_backup_increment->{$tarPath}; return unless (defined($size)); # is this file too large to fit into MaxArchiveFileSize? if ( ($current_tar_size + tar_overhead($tarPath) + $size) > $max_file_size ) { print STDERR "# tar file $current_tar_size + $tar_header_length + $size > $max_file_size, splitting\n" if ($opts{d}); new_tar_part(); } #print STDERR "A $tarPath [$size] tell: $current_tar_size\n" if ($opts{d}); $items_in_part++; if ( defined($PathRemove) && substr($tarPath, 0, length($PathRemove)) eq $PathRemove ) { substr($tarPath, 0, length($PathRemove)) = $PathAdd; } $tarPath = "./" . $tarPath if ( $tarPath !~ /^\.\// ); $tarPath =~ s{//+}{/}g; $hdr->{name} = $tarPath; if ( $hdr->{type} == BPC_FTYPE_DIR ) { # # Directory: just write the header # $hdr->{name} .= "/" if ( $hdr->{name} !~ m{/$} ); TarWriteFileInfo($fh, $hdr); $DirCnt++; } elsif ( $hdr->{type} == BPC_FTYPE_FILE ) { # # Regular file: write the header and file # my $f = BackupPC::FileZIO->open($hdr->{fullPath}, 0, $hdr->{compress}); if ( !defined($f) ) { print(STDERR "Unable to open file $hdr->{fullPath}\n"); $ErrorCnt++; return; } # do we need to split file? if ($hdr->{size} < $max_file_size) { TarWriteFileInfo($fh, $hdr); my($data, $size); while ( $f->read(\$data, $BufSize) > 0 ) { TarWrite($fh, \$data); $size += length($data); } $f->close; TarWritePad($fh, $size); $FileCnt++; $ByteCnt += $size; } else { my $full_size = $hdr->{size}; my $orig_name = $hdr->{name}; my $max_part_size = $max_file_size - tar_overhead($hdr->{name}); my $parts = int(($full_size + $max_part_size - 1) / $max_part_size); print STDERR "# splitting $orig_name [$full_size bytes] into $parts parts\n" if ($opts{d}); foreach my $subpart ( 1 .. $parts ) { new_tar_part(); if ($subpart < $parts) { $hdr->{size} = $max_part_size; } else { $hdr->{size} = $full_size % $max_part_size; } $hdr->{name} = $orig_name . '/' . $subpart; print STDERR "## creating part $subpart ",$hdr->{name}, " [", $hdr->{size}," bytes]\n"; TarWriteFileInfo($fh, $hdr); my($data, $size); if (0) { for ( 1 .. int($hdr->{size} / $BufSize) ) { my $r_size = $f->read(\$data, $BufSize); die "expected $BufSize bytes read, got $r_size bytes!" if ($r_size != $BufSize); TarWrite($fh, \$data); $size += length($data); } } my $size_left = $hdr->{size} % $BufSize; my $r_size = $f->read(\$data, $size_left); die "expected $size_left bytes last read, got $r_size bytes!" if ($r_size != $size_left); TarWrite($fh, \$data); $size += length($data); TarWritePad($fh, $size); $items_in_part++; } $f->close; $FileCnt++; $ByteCnt += $full_size; new_tar_part(); } } elsif ( $hdr->{type} == BPC_FTYPE_HARDLINK ) { # # Hardlink file: either write a hardlink or the complete file # depending upon whether the linked-to file will be written # to the archive. # # Start by reading the contents of the link. # my $f = BackupPC::FileZIO->open($hdr->{fullPath}, 0, $hdr->{compress}); if ( !defined($f) ) { print(STDERR "Unable to open file $hdr->{fullPath}\n"); $ErrorCnt++; return; } my $data; while ( $f->read(\$data, $BufSize) > 0 ) { $hdr->{linkname} .= $data; } $f->close; my $done = 0; my $name = $hdr->{linkname}; $name =~ s{^\./}{/}; if ( $HardLinkExtraFiles{$name} ) { # # Target file will be or was written, so just remember # the hardlink so we can dump it later. # push(@HardLinks, $hdr); $SpecialCnt++; } else { # # Have to dump the original file. Just call the top-level # routine, so that we save the hassle of dealing with # mangling, merging and attributes. # $HardLinkExtraFiles{$hdr->{linkname}} = 1; archiveWrite($fh, $hdr->{linkname}, $hdr->{name}); } } elsif ( $hdr->{type} == BPC_FTYPE_SYMLINK ) { # # Symbolic link: read the symbolic link contents into the header # and write the header. # my $f = BackupPC::FileZIO->open($hdr->{fullPath}, 0, $hdr->{compress}); if ( !defined($f) ) { print(STDERR "Unable to open symlink file $hdr->{fullPath}\n"); $ErrorCnt++; return; } my $data; while ( $f->read(\$data, $BufSize) > 0 ) { $hdr->{linkname} .= $data; } $f->close; $hdr->{size} = 0; TarWriteFileInfo($fh, $hdr); $SpecialCnt++; } elsif ( $hdr->{type} == BPC_FTYPE_CHARDEV || $hdr->{type} == BPC_FTYPE_BLOCKDEV || $hdr->{type} == BPC_FTYPE_FIFO ) { # # Special files: for char and block special we read the # major and minor numbers from a plain file. # if ( $hdr->{type} != BPC_FTYPE_FIFO ) { my $f = BackupPC::FileZIO->open($hdr->{fullPath}, 0, $hdr->{compress}); my $data; if ( !defined($f) || $f->read(\$data, $BufSize) < 0 ) { print(STDERR "Unable to open/read char/block special file" . " $hdr->{fullPath}\n"); $f->close if ( defined($f) ); $ErrorCnt++; return; } $f->close; if ( $data =~ /(\d+),(\d+)/ ) { $hdr->{devmajor} = $1; $hdr->{devminor} = $2; } } $hdr->{size} = 0; TarWriteFileInfo($fh, $hdr); $SpecialCnt++; } else { print(STDERR "Got unknown type $hdr->{type} for $hdr->{name}\n"); $ErrorCnt++; } } my $t_fmt = '%Y-%m-%d %H:%M:%S'; sub curr_time { return strftime($t_fmt,localtime()); }