-#!/usr/bin/perl
+#!/usr/bin/perl -w
#============================================================= -*-perl-*-
#
# BackupPC_tarIncCreate: create a tar archive of an existing incremental dump
use BackupPC::SearchLib;
use Time::HiRes qw/time/;
use POSIX qw/strftime/;
+use File::Which;
+use File::Path;
+use File::Slurp;
use Data::Dumper; ### FIXME
die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) );
my $TopDir = $bpc->TopDir();
my $BinDir = $bpc->BinDir();
my %Conf = $bpc->Conf();
+%BackupPC::SearchLib::Conf = %Conf;
my %opts;
my $in_backup_increment;
-if ( !getopts("th:n:p:r:s:b:w:v", \%opts) ) {
+if ( !getopts("th:n:p:r:s:b:w:vdf", \%opts) ) {
print STDERR <<EOF;
usage: $0 [options]
Required options:
-p pathAdd new path prefix
-b BLOCKS BLOCKS x 512 bytes per record (default 20; same as tar)
-w writeBufSz write buffer size (default 1048576 = 1MB)
+ -f overwrite existing parts
-v verbose output
+ -d debug output
EOF
exit(1);
}
if ( $opts{h} !~ /^([\w\.\s-]+)$/ ) {
- print(STDERR "$0: bad host name '$opts{h}'\n");
- exit(1);
+ die "$0: bad host name '$opts{h}'\n";
}
my $Host = $opts{h};
if ( $opts{n} !~ /^(-?\d+)$/ ) {
- print(STDERR "$0: bad dump number '$opts{n}'\n");
- exit(1);
+ die "$0: bad dump number '$opts{n}'\n";
}
my $Num = $opts{n};
+my $bin;
+foreach my $c (qw/gzip md5sum tee/) {
+ $bin->{$c} = which($c) || die "$0 needs $c, install it\n";
+}
+
my @Backups = $bpc->BackupInfoRead($Host);
my $FileCnt = 0;
my $ByteCnt = 0;
my $DirCnt = 0;
my $SpecialCnt = 0;
my $ErrorCnt = 0;
+my $current_tar_size = 0;
+my $total_increment_size = 0;
my $i;
$Num = $Backups[@Backups + $Num]{num} if ( -@Backups <= $Num && $Num < 0 );
last if ( $Backups[$i]{num} == $Num );
}
if ( $i >= @Backups ) {
- print(STDERR "$0: bad backup number $Num for host $Host\n");
- exit(1);
+ die "$0: bad backup number $Num for host $Host\n";
}
my $PathRemove = $1 if ( $opts{r} =~ /(.+)/ );
my $PathAdd = $1 if ( $opts{p} =~ /(.+)/ );
if ( $opts{s} !~ /^([\w\s\.\/\$-]+)$/ && $opts{s} ne "*" ) {
- print(STDERR "$0: bad share name '$opts{s}'\n");
- exit(1);
+ die "$0: bad share name '$opts{s}'\n";
}
our $ShareName = $opts{s};
our $view = BackupPC::View->new($bpc, $Host, \@Backups);
+# database
+
+my $dsn = $Conf{SearchDSN};
+my $db_user = $Conf{SearchUser} || '';
+
+my $dbh = DBI->connect($dsn, $db_user, "", { RaiseError => 1, AutoCommit => 0} );
+
+my $sth_inc_size = $dbh->prepare(qq{
+ update backups set
+ inc_size = ?,
+ parts = ?,
+ inc_deleted = false
+ where id = ?
+});
+my $sth_backup_parts = $dbh->prepare(qq{
+ insert into backup_parts (
+ backup_id,
+ part_nr,
+ tar_size,
+ size,
+ md5,
+ items
+ ) values (?,?,?,?,?,?)
+});
+
#
# This constant and the line of code below that uses it are borrowed
# from Archive::Tar. Thanks to Calle Dybedahl and Stephen Zander.
#
# Write out all the requested files/directories
#
-binmode(STDOUT);
-my $fh = *STDOUT;
+
+my $max_file_size = $Conf{'MaxArchiveFileSize'} || die "problem with MaxArchiveFileSize parametar";
+
+my $tar_dir = $Conf{InstallDir}.'/'.$Conf{GzipTempDir};
+die "problem with $tar_dir, check GzipTempDir in configuration\n" unless (-d $tar_dir && -w $tar_dir);
+
+my $tar_file = BackupPC::SearchLib::getGzipName($Host, $ShareName, $Num) || die "can't getGzipName($Host, $ShareName, $Num)";
+
+my $tar_path_final = $tar_dir . '/' . $tar_file;
+my $tar_path = $tar_path_final . '.tmp';
+
+$tar_path =~ s#//#/#g;
+
+my $sth = $dbh->prepare(qq{
+ SELECT
+ backups.id
+ FROM backups
+ JOIN shares on shares.id = shareid
+ JOIN hosts on hosts.id = shares.hostid
+ WHERE hosts.name = ? and shares.name = ? and backups.num = ?
+});
+$sth->execute($Host, $ShareName, $Num);
+my ($backup_id) = $sth->fetchrow_array;
+$sth->finish;
+
+
+# delete exising backup_parts
+my $sth_delete_backup_parts = $dbh->prepare(qq{
+ delete from backup_parts
+ where backup_id = ?
+});
+$sth_delete_backup_parts->execute($backup_id);
+
+
+print STDERR "backup_id: $backup_id working dir: $tar_dir, max uncompressed size $max_file_size bytes, tar $tar_file\n" if ($opts{d});
+
+if (-e $tar_path_final) {
+ if ($opts{f}) {
+ rmtree $tar_path_final || die "can't remove $tar_path_final: $!";
+ } else {
+ die "$tar_path_final allready exists\n";
+ }
+}
+
+my $fh;
+my $part = 0;
+my $no_files = 0;
+my $items_in_part = 0;
+
+sub new_tar_part {
+ my $arg = {@_};
+
+ if ($fh) {
+ return if ($current_tar_size == 0);
+
+ print STDERR " $part";
+
+ #
+ # Finish with two null 512 byte headers,
+ # and then round out a full block.
+ #
+ my $data = "\0" x ($tar_header_length * 2);
+ TarWrite($fh, \$data);
+ TarWrite($fh, undef);
+
+ close($fh) || die "can't close archive part $part: $!";
+
+ my $file = $tar_path . '/' . $part;
+
+ my $md5 = read_file( $file . '.md5' ) || die "can't read md5sum file ${file}.md5";
+ $md5 =~ s/\s.*$//;
+
+ my $size = (stat( $file . '.tar.gz' ))[7] || die "can't stat ${file}.tar.gz";
+
+ $sth_backup_parts->execute(
+ $backup_id,
+ $part,
+ $current_tar_size,
+ $size,
+ $md5,
+ $items_in_part,
+ );
+
+ $total_increment_size += $size;
+
+ if ($arg->{close}) {
+
+ sub move($$) {
+ my ($from,$to) = @_;
+ print STDERR "# rename $from -> $to\n" if ($opts{d});
+ rename $from, $to || die "can't move $from -> $to: $!\n";
+ }
+
+ if ($part == 1) {
+ print STDERR " single" if ($opts{v});
+ move("${tar_path}/1.tar.gz", "${tar_path_final}.tar.gz");
+ move("${tar_path}/1.md5", "${tar_path_final}.md5");
+ rmtree $tar_path or die "can't remove temporary dir $tar_path: $!";
+ } else {
+ print STDERR " [last]" if ($opts{v});
+ move("${tar_path}", "${tar_path_final}");
+
+ # if this archive was single part, remove it
+ foreach my $suffix (qw/.tar.gz .md5/) {
+ my $path = $tar_path_final . $suffix;
+ unlink $path if (-e $path);
+ }
+ }
+
+ $sth_inc_size->execute(
+ $total_increment_size,
+ $part,
+ $backup_id
+ );
+
+ print STDERR ", $total_increment_size bytes\n" if ($opts{v});
+
+ return;
+ }
+
+ }
+
+ $part++;
+
+ # if this is first part, create directory
+
+ if ($part == 1) {
+ if (-e $tar_path) {
+ print STDERR "# deleting existing $tar_path\n" if ($opts{d});
+ rmtree($tar_path);
+ }
+ mkdir($tar_path) || die "can't create directory $tar_path: $!";
+
+ sub abort_cleanup {
+ print STDERR "ABORTED: cleanup temp dir";
+ rmtree($tar_path);
+ $dbh->rollback;
+ exit 1;
+ }
+
+ $SIG{'INT'} = \&abort_cleanup;
+ $SIG{'QUIT'} = \&abort_cleanup;
+ $SIG{'__DIE__'} = \&abort_cleanup;
+
+ }
+
+ my $file = $tar_path . '/' . $part;
+
+ #
+ # create comprex pipe which will pass output through gzip
+ # for compression, create file on disk using tee
+ # and pipe same output to md5sum to create checksum
+ #
+
+ my $cmd = '| ' . $bin->{'gzip'} . ' ' . $Conf{GzipLevel} . ' ' .
+ '| ' . $bin->{'tee'} . ' ' . $file . '.tar.gz' . ' ' .
+ '| ' . $bin->{'md5sum'} . ' - > ' . $file . '.md5';
+
+ print STDERR "## $cmd\n" if ($opts{d});
+
+ open($fh, $cmd) or die "can't open $cmd: $!";
+ binmode($fh);
+
+ $current_tar_size = 0;
+ $items_in_part = 0;
+}
+
+new_tar_part();
if (seedCache($Host, $ShareName, $Num)) {
archiveWrite($fh, '/');
archiveWriteHardLinks($fh);
+ new_tar_part( close => 1 );
} else {
- print STDERR "NOTE: no files found for $Host:$ShareName, increment $Num\n";
+ print STDERR "NOTE: no files found for $Host:$ShareName, increment $Num\n" if ($opts{v});
+ # remove temporary files if there are no files
+ rmtree($tar_path);
}
-#
-# Finish with two null 512 byte headers, and then round out a full
-# block.
-#
-my $data = "\0" x ($tar_header_length * 2);
-TarWrite($fh, \$data);
-TarWrite($fh, undef);
-
#
# print out totals if requested
#
# Got errors, with no files or directories; exit with non-zero
# status
#
- exit(1);
+ die "got errors or no files\n";
}
-exit(0);
+
+$sth_inc_size->finish;
+$sth_backup_parts->finish;
+
+$dbh->commit || die "can't commit changes to database";
+$dbh->disconnect();
+
+exit;
###########################################################################
# Subroutines
{
my($fh, $dataRef) = @_;
+
if ( !defined($dataRef) ) {
#
# do flush by padding to a full $WriteBufSz
my $data = "\0" x ($WriteBufSz - length($WriteBuf));
$dataRef = \$data;
}
+
+ # poor man's tell :-)
+ $current_tar_size += length($$dataRef);
+
if ( length($WriteBuf) + length($$dataRef) < $WriteBufSz ) {
#
# just buffer and return
my $done = $WriteBufSz - length($WriteBuf);
if ( syswrite($fh, $WriteBuf . substr($$dataRef, 0, $done))
!= $WriteBufSz ) {
- print(STDERR "Unable to write to output file ($!)\n");
- exit(1);
+ die "Unable to write to output file ($!)\n";
}
while ( $done + $WriteBufSz <= length($$dataRef) ) {
if ( syswrite($fh, substr($$dataRef, $done, $WriteBufSz))
!= $WriteBufSz ) {
- print(STDERR "Unable to write to output file ($!)\n");
- exit(1);
+ die "Unable to write to output file ($!)\n";
}
$done += $WriteBufSz;
}
sub seedCache($$$) {
my ($host, $share, $dumpNo) = @_;
- my $dsn = $Conf{SearchDSN};
- my $db_user = $Conf{SearchUser} || '';
-
- print STDERR curr_time(), "getting files for $host:$share increment $dumpNo..." if ($opts{v});
+ print STDERR curr_time(), "$host:$share #$dumpNo" if ($opts{v});
my $sql = q{
- SELECT path
+ SELECT path,size
FROM files
JOIN shares on shares.id = shareid
JOIN hosts on hosts.id = shares.hostid
WHERE hosts.name = ? and shares.name = ? and backupnum = ?
};
- my $dbh = DBI->connect($dsn, $db_user, "", { RaiseError => 1, AutoCommit => 1} );
my $sth = $dbh->prepare($sql);
$sth->execute($host, $share, $dumpNo);
my $count = $sth->rows;
- print STDERR " found $count items\n" if ($opts{v});
+ print STDERR " $count items, parts:" if ($opts{v});
while (my $row = $sth->fetchrow_arrayref) {
#print STDERR "+ ", $row->[0],"\n";
- $in_backup_increment->{ $row->[0] }++;
+ $in_backup_increment->{ $row->[0] } = $row->[1];
}
$sth->finish();
- $dbh->disconnect();
return $count;
}
+#
+# calculate overhad for one file in tar
+#
+sub tar_overhead($) {
+ my $name = shift || '';
+
+ # header, padding of file and two null blocks at end
+ my $len = 4 * $tar_header_length;
+
+ # if filename is longer than 99 chars subtract blocks for
+ # long filename
+ if ( length($name) > 99 ) {
+ $len += int( ( length($name) + $tar_header_length ) / $tar_header_length ) * $tar_header_length;
+ }
+
+ return $len;
+}
+
my $Attr;
my $AttrDir;
$tarPath =~ s{//+}{/}g;
-#print STDERR "? $tarPath\n";
- return unless ($in_backup_increment->{$tarPath});
-#print STDERR "A $tarPath\n";
+ #print STDERR "? $tarPath\n" if ($opts{d});
+ my $size = $in_backup_increment->{$tarPath};
+ return unless (defined($size));
+
+ # is this file too large to fit into MaxArchiveFileSize?
+
+ if ( ($current_tar_size + tar_overhead($tarPath) + $size) > $max_file_size ) {
+ print STDERR "# tar file $current_tar_size + $tar_header_length + $size > $max_file_size, splitting\n" if ($opts{d});
+ new_tar_part();
+ }
+
+ #print STDERR "A $tarPath [$size] tell: $current_tar_size\n" if ($opts{d});
+ $items_in_part++;
if ( defined($PathRemove)
&& substr($tarPath, 0, length($PathRemove)) eq $PathRemove ) {
#
# Directory: just write the header
#
-
-
$hdr->{name} .= "/" if ( $hdr->{name} !~ m{/$} );
TarWriteFileInfo($fh, $hdr);
$DirCnt++;
$ErrorCnt++;
return;
}
- TarWriteFileInfo($fh, $hdr);
- my($data, $size);
- while ( $f->read(\$data, $BufSize) > 0 ) {
- TarWrite($fh, \$data);
- $size += length($data);
- }
- $f->close;
- TarWritePad($fh, $size);
+ # do we need to split file?
+ if ($hdr->{size} < $max_file_size) {
+ TarWriteFileInfo($fh, $hdr);
+ my($data, $size);
+ while ( $f->read(\$data, $BufSize) > 0 ) {
+ TarWrite($fh, \$data);
+ $size += length($data);
+ }
+ $f->close;
+ TarWritePad($fh, $size);
$FileCnt++;
$ByteCnt += $size;
+ } else {
+ my $full_size = $hdr->{size};
+ my $orig_name = $hdr->{name};
+ my $max_part_size = $max_file_size - tar_overhead($hdr->{name});
+
+ my $parts = int(($full_size + $max_part_size - 1) / $max_part_size);
+ print STDERR "# splitting $orig_name [$full_size bytes] into $parts parts\n" if ($opts{d});
+ foreach my $subpart ( 1 .. $parts ) {
+ new_tar_part();
+ if ($subpart < $parts) {
+ $hdr->{size} = $max_part_size;
+ } else {
+ $hdr->{size} = $full_size % $max_part_size;
+ }
+ $hdr->{name} = $orig_name . '/' . $subpart;
+ print STDERR "## creating part $subpart ",$hdr->{name}, " [", $hdr->{size}," bytes]\n";
+
+ TarWriteFileInfo($fh, $hdr);
+ my($data, $size);
+if (0) {
+ for ( 1 .. int($hdr->{size} / $BufSize) ) {
+ my $r_size = $f->read(\$data, $BufSize);
+ die "expected $BufSize bytes read, got $r_size bytes!" if ($r_size != $BufSize);
+ TarWrite($fh, \$data);
+ $size += length($data);
+ }
+}
+ my $size_left = $hdr->{size} % $BufSize;
+ my $r_size = $f->read(\$data, $size_left);
+ die "expected $size_left bytes last read, got $r_size bytes!" if ($r_size != $size_left);
+
+ TarWrite($fh, \$data);
+ $size += length($data);
+ TarWritePad($fh, $size);
+
+ $items_in_part++;
+ }
+ $f->close;
+ $FileCnt++;
+ $ByteCnt += $full_size;
+ new_tar_part();
+ }
} elsif ( $hdr->{type} == BPC_FTYPE_HARDLINK ) {
#
# Hardlink file: either write a hardlink or the complete file
- # depending upon whether the linked-to file will be written
- # to the archive.
+ # depending upon whether the linked-to file will be written
+ # to the archive.
#
- # Start by reading the contents of the link.
- #
+ # Start by reading the contents of the link.
+ #
my $f = BackupPC::FileZIO->open($hdr->{fullPath}, 0, $hdr->{compress});
if ( !defined($f) ) {
print(STDERR "Unable to open file $hdr->{fullPath}\n");
while ( $f->read(\$data, $BufSize) > 0 ) {
$hdr->{linkname} .= $data;
}
- $f->close;
- my $done = 0;
- my $name = $hdr->{linkname};
- $name =~ s{^\./}{/};
- if ( $HardLinkExtraFiles{$name} ) {
- #
- # Target file will be or was written, so just remember
- # the hardlink so we can dump it later.
- #
- push(@HardLinks, $hdr);
- $SpecialCnt++;
- } else {
- #
- # Have to dump the original file. Just call the top-level
- # routine, so that we save the hassle of dealing with
- # mangling, merging and attributes.
- #
- $HardLinkExtraFiles{$hdr->{linkname}} = 1;
- archiveWrite($fh, $hdr->{linkname}, $hdr->{name});
- }
+ $f->close;
+ my $done = 0;
+ my $name = $hdr->{linkname};
+ $name =~ s{^\./}{/};
+ if ( $HardLinkExtraFiles{$name} ) {
+ #
+ # Target file will be or was written, so just remember
+ # the hardlink so we can dump it later.
+ #
+ push(@HardLinks, $hdr);
+ $SpecialCnt++;
+ } else {
+ #
+ # Have to dump the original file. Just call the top-level
+ # routine, so that we save the hassle of dealing with
+ # mangling, merging and attributes.
+ #
+ $HardLinkExtraFiles{$hdr->{linkname}} = 1;
+ archiveWrite($fh, $hdr->{linkname}, $hdr->{name});
+ }
} elsif ( $hdr->{type} == BPC_FTYPE_SYMLINK ) {
#
# Symbolic link: read the symbolic link contents into the header