support hostList in quotes as -h option
[BackupPC.git] / bin / BackupPC_ASA_PostArchive_Update
1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "/usr/local/BackupPC/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use BackupPC::Attrib qw/:all/;
10 #use Data::Dumper;
11 use Data::Dump qw(dump);
12 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
14 use Cwd qw/abs_path/;
15 #use Archive::Tar::Streamed;
16 #use Algorithm::Diff;
17 use Getopt::Std;
18 use File::Slurp;
19
20 =head1 NAME
21
22 BackupPC_ASA_PostArchive_Update
23
24 =head1 DESCRIPTION
25
26         # /etc/BackupPC/pc/dvd_tar.pl
27
28 =cut
29
30 # FIXME
31 my $debug = $ENV{DEBUG} || 1;
32 my $check = $ENV{CHECK} || 1;
33
34
35 my $bpc = BackupPC::Lib->new || die "can't create BackupPC::Lib";
36 my %Conf = $bpc->Conf();
37 warn "## ARGV=",dump @ARGV;
38
39
40 my $args;
41 my $name;
42 foreach ( @ARGV ) {
43         my $v = $_;
44         if ( m/(\w+)=(.+)/ ) {
45                 $name = $1;
46                 $v = $2;
47         }
48         if ( $name =~ m/List/ ) {
49                 push @{ $args->{$name} }, $v;
50         } else {
51                 $args->{$name} = $v;
52         }
53 }
54
55 warn "args = ",dump($args);
56
57 sub check_archive;
58
59 foreach ( 0 .. $#{ $args->{HostList} } ) {
60
61         my $host = $args->{'HostList'}->[$_];
62         my $num  = $args->{'BackupList'}->[$_];
63
64         check_archive $host => $num;
65
66 }
67
68 exit;
69
70
71 use BackupPC::Search;
72 %BackupPC::Search::Conf = %Conf;
73
74 my $path = abs_path($0);
75 $path =~ s{/[^/]+$}{/}; # FIXME remove?
76
77 $|=1;
78
79 my $start_t = time();
80
81 my $t_fmt = '%Y-%m-%d %H:%M:%S';
82
83 warn "## Conf = ",dump( \%Conf );
84
85 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
86 my $user = $Conf{SearchUser} || '';
87
88 my $dbh = DBI->connect($Conf{SearchDSN}, $Conf{SearchUser}, "", { RaiseError => 1, AutoCommit => 0 });
89
90 #---- subs ----
91
92
93 sub curr_time {
94         return strftime($t_fmt,localtime());
95 }
96
97 sub fmt_time {
98         my $t = shift || return;
99         my $out = "";
100         my ($ss,$mm,$hh) = gmtime($t);
101         $out .= "${hh}h" if ($hh);
102         $out .= sprintf("%02d:%02d", $mm,$ss);
103         return $out;
104 }
105
106 my $hsn_cache;
107
108 sub get_backup_id($$$) {
109         my ($host, $share, $num) = @_;
110
111         my $key = "$host $share $num";
112         return $hsn_cache->{$key} if ($hsn_cache->{$key});
113
114         my $sth = $dbh->prepare(qq{
115                 SELECT 
116                         backups.id
117                 FROM backups 
118                 INNER JOIN shares       ON backups.shareID=shares.ID
119                 INNER JOIN hosts        ON backups.hostID = hosts.ID
120                 WHERE hosts.name = ? and shares.name = ? and backups.num = ?
121         });
122         $sth->execute($host, $share, $num);
123         my ($id) = $sth->fetchrow_array;
124
125         $hsn_cache->{"$host $share $num"} = $id;
126
127         print STDERR "# $host $share $num == $id\n" if $debug;
128
129         return $id;
130 }
131
132 sub backup_inc_deleted($) {
133         my $backup_id = shift;
134         my $sth_inc_deleted = $dbh->prepare(qq{
135                 update backups set
136                         inc_deleted = true
137                 where id = ?
138         });
139         $sth_inc_deleted->execute($backup_id);
140 }
141
142 sub system_ok {
143         warn "## system_ok @_\n";
144         system(@_) == 0 || die "system @_:$!";
145 }
146
147 sub check_archive {
148         my ($host,$num) = @_;
149
150         my $t = time();
151         print curr_time, " check $host $num";
152
153         my $md5_path = "$Conf{ArchiveDest}/$host.$num.md5";
154         unlink $md5_path if -s $md5_path == 0; # fix empty
155
156         if ( ! -e $md5_path ) {
157                 system_ok "md5sum $Conf{ArchiveDest}/$host.$num.* > $md5_path";
158         } else {
159                 system_ok "md5sum -c $md5_path" if $check;
160         }
161
162         my $md5sum;
163         foreach ( split(/\n/, read_file "$Conf{ArchiveDest}/$host.$num.md5" ) ) {
164                 my ( $path, $md5 ) = split(/\s+/,$_);
165                 $md5sum->{$path} = $md5;
166         }
167
168         # depending on expected returned value this is used like:
169         # my $uncompress_size = get_gzip_size('/full/path/to.gz');
170         # my ($compress_size, $uncompress_size) = get_gzip_size('/path.gz');
171         sub get_gzip_size($) {
172                 my $filename = shift;
173                 die "file $filename problem: $!" unless (-r $filename);
174                 open(my $gzip, $Conf{GzipPath}." -l $filename |") || die "can't gzip -l $filename: $!";
175                 my $line = <$gzip>;
176                 chomp($line);
177                 $line = <$gzip> if ($line =~ /^\s+compressed/);
178
179                 my ($comp, $uncomp) = (0,0);
180
181                 if ($line =~ m/^\s+(\d+)\s+(\d+)\s+\d+\.\d+/) {
182                         if (wantarray) {
183                                 return [ $1, $2 ];
184                         } else {
185                                 return $2;
186                         }
187                 } else {
188                         die "can't find size in line: $line";
189                 }
190         }
191
192         sub check_part {
193                 my ($host, $share, $num, $part_nr, $tar_size, $size, $md5, $items) = @_;
194                 my $backup_id = get_backup_id($host, $share, $num);
195                 my $sth_md5 = $dbh->prepare(qq{
196                         select
197                                 id, tar_size, size, md5, items
198                         from backup_parts
199                         where backup_id = ? and part_nr = ?
200                 });
201
202                 $sth_md5->execute($backup_id, $part_nr);
203
204                 if (my $row = $sth_md5->fetchrow_hashref) {
205                         return if (
206                                 $row->{tar_size} >= $tar_size &&
207                                 $row->{size} == $size &&
208                                 $row->{md5} eq $md5 &&
209                                 $row->{items} == $items
210                         );
211                         print ", deleting invalid backup_parts $row->{id}";
212                         $dbh->do(qq{ delete from backup_parts where id = $row->{id} });
213                 }
214                 print ", inserting new";
215                 my $sth_insert = $dbh->prepare(qq{
216                         insert into backup_parts (
217                                 backup_id,
218                                 part_nr,
219                                 tar_size,
220                                 size,
221                                 md5,
222                                 items
223                         ) values (?,?,?,?,?,?)
224                 });
225
226                 $sth_insert->execute($backup_id, $part_nr, $tar_size, $size, $md5, $items);
227                 $dbh->commit;
228         }
229
230         my @tar_parts =
231                 sort map { s/^\Q$Conf{ArchiveDest}\E//; $_ }
232                 glob "$Conf{ArchiveDest}/$host.$num.*"
233                 ;
234         print " [parts: ",join(", ", @tar_parts),"]" if $debug;
235
236         my $same = 1;
237         my @tar_files;
238
239         my $backup_part;
240
241         print " reading" if $debug;
242
243         my $part_nr = 1;
244
245         foreach my $tarfilename (@tar_parts) {
246
247                 print "\n\t- $tarfilename";
248
249                 my $path = "$Conf{ArchiveDest}/$tarfilename";
250
251                 my $size = (stat( $path ))[7] || die "can't stat $path: $!";
252
253                 if ($size > $Conf{MaxArchiveSize}) {
254                         print ", part bigger than media $size > $Conf{MaxArchiveSize}\n";
255                         return 0;
256                 }
257
258                 print ", $size bytes";
259
260 =for later
261
262                 open(my $fh, "gzip -cd $path |") or die "can't open $path: $!";
263                 binmode($fh);
264                 my $tar = Archive::Tar::Streamed->new($fh);
265
266                 my $tar_size_inarc = 0;
267                 my $items = 0;
268
269                 while(my $entry = $tar->next) {
270                         push @tar_files, $entry->name;
271                         $items++;
272                         $tar_size_inarc += $entry->size;
273
274                         if ($tar_size_inarc > $Conf{MaxArchiveFileSize}) {
275                                 print ", part $tarfilename is too big $tar_size_inarc > $Conf{MaxArchiveFileSize}\n";
276                                 return 0;
277                         }
278
279                 }
280
281                 close($fh);
282
283                 print ", $items items";
284
285                 if ($tar_size_inarc == 0 && $items == 0) {
286                         print ", EMPTY tar\n";
287
288                         my $backup_id = get_backup_id($host, $share, $num);
289                         backup_inc_deleted( $backup_id );
290
291                         $dbh->commit;
292
293                         return 1;
294                 }
295
296 =cut
297
298                 # FIXME
299                 my $tar_size = $path =~ m/.gz/ ? get_gzip_size( $path ) : -s $path;
300
301                 #
302                 # finally, check if backup_parts table in database is valid
303                 #
304
305                 my $md5 = $md5sum->{$path} || die "no md5sum for $path";
306                 my $items = 1;
307
308                 check_part($host, undef, $num, $part_nr, $tar_size, $size, $md5, $items);
309
310                 $part_nr++;
311         }
312
313         # short-cut and exit;
314         return $same unless($same);
315
316         @tar_files = sort @tar_files;
317         print "\n\t",($#tar_files + 1), " tar files";
318
319         my $sth = $dbh->prepare(qq{
320                 SELECT path,type
321                 FROM files
322                 JOIN shares on shares.id = shareid
323                 JOIN hosts on hosts.id = shares.hostid
324                 WHERE hosts.name = ? and backupnum = ?
325         });
326         $sth->execute($host, $num);
327         my @db_files;
328         while( my $row = $sth->fetchrow_hashref ) {
329
330                 my $path = $row->{'path'} || die "no path?";
331                 $path =~ s#^/#./#;
332                 $path .= '/' if ($row->{'type'} == BPC_FTYPE_DIR);
333                 push @db_files, $path;
334         }
335
336         print " ",($#db_files + 1), " database files, diff";
337
338         @db_files = sort @db_files;
339
340         if ($#tar_files != $#db_files) {
341                 $same = 0;
342                 print " NUMBER";
343         } else {
344                 my $diff = Algorithm::Diff->new(\@tar_files, \@db_files);
345                 while ( $diff->Next() ) {
346                         next if $diff->Same();
347                         $same = 0;
348                         print "< $_\n" for $diff->Items(1);
349                         print "> $_\n" for $diff->Items(2);
350                 }
351         }
352
353         print " ",($same ? 'ok' : 'DIFFERENT'),
354                 ", dur: ",fmt_time(time() - $t), "\n";
355
356         return $same;
357 }
358
359
360 #----- main
361
362 my $sth = $dbh->prepare( qq{
363         
364 select
365         backups.id as backup_id,
366         hosts.name as host,
367         shares.name as share,
368         backups.num as num,
369         backups.date,
370         inc_size,
371         parts,
372         count(backup_parts.backup_id) as backup_parts
373 from backups
374         join shares on backups.hostid = shares.hostid
375                 and shares.id = backups.shareid
376         join hosts on shares.hostid = hosts.id
377         full outer join backup_parts on backups.id = backup_parts.backup_id
378 where not inc_deleted and backups.size > 0
379 group by backups.id, hosts.name, shares.name, backups.num, backups.date, inc_size, parts, backup_parts.backup_id
380 order by backups.date
381
382 } );
383
384 $sth->execute();
385 my $num_backups = $sth->rows;
386 my $curr_backup = 1;
387
388 while (my $row = $sth->fetchrow_hashref) {
389
390         $curr_backup++;
391
392         my $tar_file = BackupPC::Search::getGzipName($row->{'host'}, $row->{'share'}, $row->{'num'});
393
394         # this will return -1 if file doesn't exist
395         my $size = BackupPC::Search::get_tgz_size_by_name($tar_file);
396
397         print "# host: ".$row->{host}.", share: ".$row->{'share'}.", backup_num:".$row->{num}." size: $size backup.size: ", $row->{inc_size},"\n" if $debug;
398
399         if ( $row->{'inc_size'} != -1 && $size != -1 && $row->{'inc_size'} >= $size && $row->{parts} == $row->{backup_parts}) {
400                 if ($check) {
401                         tar_check($row->{'host'}, $row->{'share'}, $row->{'num'}, $tar_file) && next;
402                 } else {
403                         next;
404                 }
405         }
406
407         print curr_time, " creating $curr_backup/$num_backups ", $row->{host}, ":", $row->{share}, " #", $row->{num},
408                 " ", strftime('%Y-%m-%d', localtime($row->{date})), " -> $tar_file";
409
410         my $t = time();
411
412 =for later
413         # re-create archive?
414         my $cmd = qq[ $tarIncCreate -h "$row->{host}" -s "$row->{share}" -n $row->{num} -f ];
415         print STDERR "## $cmd\n" if ($debug);
416
417         if (system($cmd) != 0) {
418                 print STDERR " FAILED, marking this backup deleted";
419                 backup_inc_deleted( $row->{backup_id} );
420         }
421 =cut
422
423         print ", dur: ",fmt_time(time() - $t), "\n";
424
425         $dbh->commit;
426
427 }
428
429 undef $sth;
430 $dbh->disconnect;