From 0c744034b07a7e2b6e395adac113e946dba53924 Mon Sep 17 00:00:00 2001 From: dpavlin Date: Tue, 8 Nov 2005 20:24:45 +0000 Subject: [PATCH] r8745@llin: dpavlin | 2005-11-08 21:24:32 +0100 re-wrote creation of tar archives. gzip is now called directly from BackupPC_tarIncCreate. It uses multiple pipes to create .tar.gz and md5sum on the fly, supports MaxArchiveFileSize as maximum size of UNCOMPRESSED archive (to facilitate decompression on filesystems with limited file size). For that, there are two split implementations: - one which splits multiple files (smaller than MaxArchiveFileSize) into multiple tar archives - other which splits individual files (larger than MaxArchiveFileSize) into multiple tar archives (dir is named like file, and files are named as part number) git-svn-id: svn+ssh://llin/home/dpavlin/private/svn/BackupPC/trunk@234 8392b6e1-25fa-0310-8288-cc32f8e212ea --- BUGS | 1 + Makefile | 3 + bin/BackupPC_tarIncCreate | 245 +++++++++++++++++++++++++++++++------- conf/config.pl | 18 ++- 4 files changed, 219 insertions(+), 48 deletions(-) diff --git a/BUGS b/BUGS index 07cd3c1..3bad8b5 100644 --- a/BUGS +++ b/BUGS @@ -19,6 +19,7 @@ - add checkbox in search for files which are not burned + from/to date same no results + sort burn archive DVDs by share and other columns +- covert size options in configuration files to bytes Site-specific: diff --git a/Makefile b/Makefile index e2cbf33..041e2d4 100644 --- a/Makefile +++ b/Makefile @@ -61,6 +61,9 @@ profile: test DBI_PROFILE=2 sudo -u backuppc /data/backuppc/bin/BackupPC_updatedb -d $(profile_opt) 2>&1 | tee -a $(profile_file) mv $(profile_file) profile.`perl -e 'my @p = glob("profile.[0-9]*"); print scalar @p + 1'` +tar: test + sudo -u backuppc /data/backuppc/bin/BackupPC_tarIncCreate -h llin -s /etc -n 10 -d -v + inc: test sudo -u backuppc /data/backuppc/bin/BackupPC_incPartsUpdate -c -d diff --git a/bin/BackupPC_tarIncCreate b/bin/BackupPC_tarIncCreate index 90e5589..18cf63c 100755 --- a/bin/BackupPC_tarIncCreate +++ b/bin/BackupPC_tarIncCreate @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/perl -w #============================================================= -*-perl-*- # # BackupPC_tarIncCreate: create a tar archive of an existing incremental dump @@ -72,17 +72,20 @@ use BackupPC::View; use BackupPC::SearchLib; use Time::HiRes qw/time/; use POSIX qw/strftime/; +use File::Which; +use File::Path; use Data::Dumper; ### FIXME die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) ); my $TopDir = $bpc->TopDir(); my $BinDir = $bpc->BinDir(); my %Conf = $bpc->Conf(); +%BackupPC::SearchLib::Conf = %Conf; my %opts; my $in_backup_increment; -if ( !getopts("th:n:p:r:s:b:w:v", \%opts) ) { +if ( !getopts("th:n:p:r:s:b:w:vd", \%opts) ) { print STDERR <{$c} = which($c) || die "$0 needs $c, install it\n"; +} + my @Backups = $bpc->BackupInfoRead($Host); my $FileCnt = 0; my $ByteCnt = 0; my $DirCnt = 0; my $SpecialCnt = 0; my $ErrorCnt = 0; +my $current_tar_size = 0; my $i; $Num = $Backups[@Backups + $Num]{num} if ( -@Backups <= $Num && $Num < 0 ); @@ -163,14 +173,77 @@ my(%HardLinkExtraFiles, @HardLinks); # # Write out all the requested files/directories # -binmode(STDOUT); -my $fh = *STDOUT; + +my $max_file_size = $Conf{'MaxArchiveFileSize'} || die "problem with MaxArchiveFileSize parametar"; +$max_file_size *= 1024; + +my $tar_dir = $Conf{InstallDir}.'/'.$Conf{GzipTempDir}; +die "problem with $tar_dir, check GzipTempDir in configuration\n" unless (-d $tar_dir && -w $tar_dir); + +my $tar_file = BackupPC::SearchLib::getGzipName($Host, $ShareName, $Num) || die "can't getGzipName($Host, $ShareName, $Num)"; + +my $tar_path = $tar_dir . '/' . $tar_file . '.tmp'; +$tar_path =~ s#//#/#g; + +print STDERR "working dir: $tar_dir, max uncompressed size $max_file_size bytes, tar $tar_file\n" if ($opts{d}); + +my $fh; +my $part = 0; +my $no_files = 0; + +sub new_tar_part { + if ($fh) { + return if ($current_tar_size == 0); + + print STDERR "# closing part $part\n" if ($opts{d}); + + # finish tar archive + my $data = "\0" x ($tar_header_length * 2); + TarWrite($fh, \$data); + TarWrite($fh, undef); + + close($fh) || die "can't close archive part $part: $!"; + } + + $part++; + + # if this is first part, create directory + + if ($part == 1) { + if (-d $tar_path) { + print STDERR "# deleting existing $tar_path\n" if ($opts{d}); + rmtree($tar_path); + } + mkdir($tar_path) || die "can't create directory $tar_path: $!"; + } + + my $file = $tar_path . '/' . $part; + + # + # create comprex pipe which will pass output through gzip + # for compression, create file on disk using tee + # and pipe same output to md5sum to create checksum + # + + my $cmd = '| ' . $bin->{'gzip'} . ' ' . $Conf{GzipLevel} . ' ' . + '| ' . $bin->{'tee'} . ' ' . $file . '.tar.gz' . ' ' . + '| ' . $bin->{'md5sum'} . ' - > ' . $file . '.md5'; + + print STDERR "## $cmd\n" if ($opts{d}); + + open($fh, $cmd) or die "can't open $cmd: $!"; + binmode($fh); + $current_tar_size = 0; +} + +new_tar_part(); if (seedCache($Host, $ShareName, $Num)) { archiveWrite($fh, '/'); archiveWriteHardLinks($fh); } else { print STDERR "NOTE: no files found for $Host:$ShareName, increment $Num\n" if ($opts{v}); + $no_files = 1; } # @@ -181,6 +254,18 @@ my $data = "\0" x ($tar_header_length * 2); TarWrite($fh, \$data); TarWrite($fh, undef); +if (! close($fh)) { + rmtree($tar_path); + die "can't close archive\n"; +} + +# remove temporary files if there are no files +if ($no_files) { + rmtree($tar_path); +} elsif ($part == 1) { + warn "FIXME: if there is only one part move to parent directory and rename"; +} + # # print out totals if requested # @@ -193,8 +278,10 @@ if ( $ErrorCnt && !$FileCnt && !$DirCnt ) { # Got errors, with no files or directories; exit with non-zero # status # + cleanup(); exit(1); } + exit(0); ########################################################################### @@ -260,6 +347,7 @@ sub TarWrite { my($fh, $dataRef) = @_; + if ( !defined($dataRef) ) { # # do flush by padding to a full $WriteBufSz @@ -267,6 +355,10 @@ sub TarWrite my $data = "\0" x ($WriteBufSz - length($WriteBuf)); $dataRef = \$data; } + + # poor man's tell :-) + $current_tar_size += length($$dataRef); + if ( length($WriteBuf) + length($$dataRef) < $WriteBufSz ) { # # just buffer and return @@ -394,7 +486,7 @@ sub seedCache($$$) { print STDERR curr_time(), "getting files for $host:$share increment $dumpNo..." if ($opts{v}); my $sql = q{ - SELECT path + SELECT path,size FROM files JOIN shares on shares.id = shareid JOIN hosts on hosts.id = shares.hostid @@ -408,7 +500,7 @@ sub seedCache($$$) { print STDERR " found $count items\n" if ($opts{v}); while (my $row = $sth->fetchrow_arrayref) { #print STDERR "+ ", $row->[0],"\n"; - $in_backup_increment->{ $row->[0] }++; + $in_backup_increment->{ $row->[0] } = $row->[1]; } $sth->finish(); @@ -417,6 +509,24 @@ sub seedCache($$$) { return $count; } +# +# calculate overhad for one file in tar +# +sub tar_overhead($) { + my $name = shift || ''; + + # header, padding of file and two null blocks at end + my $len = 4 * $tar_header_length; + + # if filename is longer than 99 chars subtract blocks for + # long filename + if ( length($name) > 99 ) { + $len += int( ( length($name) + $tar_header_length ) / $tar_header_length ) * $tar_header_length; + } + + return $len; +} + my $Attr; my $AttrDir; @@ -429,9 +539,18 @@ sub TarWriteFile $tarPath =~ s{//+}{/}g; -#print STDERR "? $tarPath\n"; - return unless ($in_backup_increment->{$tarPath}); -#print STDERR "A $tarPath\n"; + #print STDERR "? $tarPath\n" if ($opts{d}); + my $size = $in_backup_increment->{$tarPath}; + return unless (defined($size)); + + # is this file too large to fit into MaxArchiveFileSize? + + if ( ($current_tar_size + tar_overhead($tarPath) + $size) > $max_file_size ) { + print STDERR "# tar file $current_tar_size + $tar_header_length + $size > $max_file_size, splitting\n" if ($opts{d}); + new_tar_part(); + } + + print STDERR "A $tarPath [$size] tell: $current_tar_size\n" if ($opts{d}); if ( defined($PathRemove) && substr($tarPath, 0, length($PathRemove)) eq $PathRemove ) { @@ -445,8 +564,6 @@ sub TarWriteFile # # Directory: just write the header # - - $hdr->{name} .= "/" if ( $hdr->{name} !~ m{/$} ); TarWriteFileInfo($fh, $hdr); $DirCnt++; @@ -460,24 +577,66 @@ sub TarWriteFile $ErrorCnt++; return; } - TarWriteFileInfo($fh, $hdr); - my($data, $size); - while ( $f->read(\$data, $BufSize) > 0 ) { - TarWrite($fh, \$data); - $size += length($data); - } - $f->close; - TarWritePad($fh, $size); + # do we need to split file? + if ($hdr->{size} < $max_file_size) { + TarWriteFileInfo($fh, $hdr); + my($data, $size); + while ( $f->read(\$data, $BufSize) > 0 ) { + TarWrite($fh, \$data); + $size += length($data); + } + $f->close; + TarWritePad($fh, $size); $FileCnt++; $ByteCnt += $size; + } else { + my $full_size = $hdr->{size}; + my $orig_name = $hdr->{name}; + my $max_part_size = $max_file_size - tar_overhead($hdr->{name}); + + my $parts = int(($full_size + $max_part_size - 1) / $max_part_size); + print STDERR "# splitting $orig_name [$full_size bytes] into $parts parts\n" if ($opts{d}); + foreach my $subpart ( 1 .. $parts ) { + new_tar_part(); + if ($subpart < $parts) { + $hdr->{size} = $max_part_size; + } else { + $hdr->{size} = $full_size % $max_part_size; + } + $hdr->{name} = $orig_name . '/' . $subpart; + print STDERR "## creating part $subpart ",$hdr->{name}, " [", $hdr->{size}," bytes]\n"; + + TarWriteFileInfo($fh, $hdr); + my($data, $size); +if (0) { + for ( 1 .. int($hdr->{size} / $BufSize) ) { + my $r_size = $f->read(\$data, $BufSize); + die "expected $BufSize bytes read, got $r_size bytes!" if ($r_size != $BufSize); + TarWrite($fh, \$data); + $size += length($data); + } +} + my $size_left = $hdr->{size} % $BufSize; + my $r_size = $f->read(\$data, $size_left); + die "expected $size_left bytes last read, got $r_size bytes!" if ($r_size != $size_left); + + TarWrite($fh, \$data); + $size += length($data); + TarWritePad($fh, $size); + } + $f->close; + $FileCnt++; + $ByteCnt += $full_size; + new_tar_part(); + } } elsif ( $hdr->{type} == BPC_FTYPE_HARDLINK ) { # # Hardlink file: either write a hardlink or the complete file - # depending upon whether the linked-to file will be written - # to the archive. + # depending upon whether the linked-to file will be written + # to the archive. # - # Start by reading the contents of the link. - # + # Start by reading the contents of the link. + # my $f = BackupPC::FileZIO->open($hdr->{fullPath}, 0, $hdr->{compress}); if ( !defined($f) ) { print(STDERR "Unable to open file $hdr->{fullPath}\n"); @@ -488,26 +647,26 @@ sub TarWriteFile while ( $f->read(\$data, $BufSize) > 0 ) { $hdr->{linkname} .= $data; } - $f->close; - my $done = 0; - my $name = $hdr->{linkname}; - $name =~ s{^\./}{/}; - if ( $HardLinkExtraFiles{$name} ) { - # - # Target file will be or was written, so just remember - # the hardlink so we can dump it later. - # - push(@HardLinks, $hdr); - $SpecialCnt++; - } else { - # - # Have to dump the original file. Just call the top-level - # routine, so that we save the hassle of dealing with - # mangling, merging and attributes. - # - $HardLinkExtraFiles{$hdr->{linkname}} = 1; - archiveWrite($fh, $hdr->{linkname}, $hdr->{name}); - } + $f->close; + my $done = 0; + my $name = $hdr->{linkname}; + $name =~ s{^\./}{/}; + if ( $HardLinkExtraFiles{$name} ) { + # + # Target file will be or was written, so just remember + # the hardlink so we can dump it later. + # + push(@HardLinks, $hdr); + $SpecialCnt++; + } else { + # + # Have to dump the original file. Just call the top-level + # routine, so that we save the hassle of dealing with + # mangling, merging and attributes. + # + $HardLinkExtraFiles{$hdr->{linkname}} = 1; + archiveWrite($fh, $hdr->{linkname}, $hdr->{name}); + } } elsif ( $hdr->{type} == BPC_FTYPE_SYMLINK ) { # # Symbolic link: read the symbolic link contents into the header diff --git a/conf/config.pl b/conf/config.pl index 977eebb..be1b3b3 100644 --- a/conf/config.pl +++ b/conf/config.pl @@ -1761,9 +1761,17 @@ $Conf{HyperEstraierIndex} = 'http://localhost:1978/node/backuppc'; $Conf{GzipTempDir} = 'temp'; # -# schema for naming snapshot gzips +# nameing schema for snapshots (.tar.gz will be added) +# \h - hostname +# \s - share +# \n - increment numer # -$Conf{GzipSchema} = '\h_\s_\n.tar.gz'; +$Conf{GzipSchema} = '\h_\s_\n'; +# +# schema for part which will be added to GzipSchema (before .tar.gz) +# (if file is bigger than MaxArchiveFileSize) +# +$Cont{GzipSchemaPart} = '-\p'; # # archive media size (in kb) @@ -1772,9 +1780,9 @@ $Conf{GzipSchema} = '\h_\s_\n.tar.gz'; $Conf{MaxArchiveSize} = 4200 * 1024; # -# maximum size of one file on archive medium (in kb) -# default: 4Gb - 2k for DVD -$Conf{MaxArchiveFileSize} = (4096 - 2) * 1024; +# maximum size of one (uncompressed) file on archive medium (in kb) +# default: 2Gb - 2k for DVD +$Conf{MaxArchiveFileSize} = (2048 - 2) * 1024; # # Temporary directory for ISO images (relative to install dir) -- 2.20.1