4 use lib "/usr/local/BackupPC/lib";
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
16 use Data::Dump qw(dump);
18 use constant BPC_FTYPE_DIR => 5;
19 use constant EST_CHUNK => 4096;
21 # daylight saving time change offset for 1h
22 my $dst_offset = 60 * 60;
29 my $pid_path = abs_path($0);
30 $pid_path =~ s/\W+/_/g;
32 my $pidfile = new File::Pid({
33 file => "/tmp/$pid_path",
36 if (my $pid = $pidfile->running ) {
37 die "$0 already running: $pid\n";
38 } elsif ($pidfile->pid ne $$) {
40 $pidfile = new File::Pid;
42 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
45 my $t_fmt = '%Y-%m-%d %H:%M:%S';
48 my $bpc = BackupPC::Lib->new || die;
49 my %Conf = $bpc->Conf();
50 my $TopDir = $bpc->TopDir();
53 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
54 my $user = $Conf{SearchUser} || '';
56 my $index_node_url = $Conf{HyperEstraierIndex};
58 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
62 if ( !getopts("cdm:v:ijfq", \%opt ) ) {
64 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
67 -c create database on first use
68 -d delete database before import
69 -m num import just num increments for one host
70 -v num set verbosity (debug) level (default $debug)
71 -i update Hyper Estraier full text index
72 -j update full text, don't check existing files
73 -f don't do anything with full text index
74 -q be quiet for hosts without changes
76 Option -j is variation on -i. It will allow faster initial creation
77 of full-text index from existing database.
79 Option -f will create database which is out of sync with full text index. You
80 will have to re-run $0 with -i to fix it.
87 print "Debug level at $opt{v}\n";
90 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
91 $index_node_url = undef;
97 my $t = shift || return;
99 my ($ss,$mm,$hh) = gmtime($t);
100 $out .= "${hh}h" if ($hh);
101 $out .= sprintf("%02d:%02d", $mm,$ss);
106 return strftime($t_fmt,localtime());
113 my ($host_id, $share_id, $num) = @_;
115 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
117 unless ($index_node_url && $index_node_url =~ m#^http://#) {
118 print STDERR "HyperEstraier support not enabled or index node invalid\n" if ($debug);
123 print curr_time," updating Hyper Estraier:";
130 if ($index_node_url) {
131 print " opening index $index_node_url";
132 $hest_node ||= Search::Estraier::Node->new(
133 url => $index_node_url,
138 print " via node URL";
147 if (defined($host_id) && defined($share_id) && defined($num)) {
154 @data = ( $host_id, $share_id, $num );
157 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
159 my $sth = $dbh->prepare(qq{
163 shares.name AS sname,
164 -- shares.share AS sharename,
165 files.backupnum AS backupnum,
166 -- files.name AS filename,
167 files.path AS filepath,
171 files.shareid AS shareid,
172 backups.date AS backup_date
174 INNER JOIN shares ON files.shareID=shares.ID
175 INNER JOIN hosts ON hosts.ID = shares.hostID
176 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
181 $sth->execute(@data);
182 $results = $sth->rows;
185 print " - no new files\n";
192 my $t = shift || return;
193 my $iso = BackupPC::Lib::timeStamp($t);
198 while (my $row = $sth->fetchrow_hashref()) {
200 my $uri = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath};
201 if (! $skip_check && $hest_node) {
202 my $id = $hest_node->uri_to_id($uri);
203 next if ($id && $id == -1);
206 # create a document object
207 my $doc = Search::Estraier::Document->new;
209 # add attributes to the document object
210 $doc->add_attr('@uri', $uri);
212 foreach my $c (@{ $sth->{NAME} }) {
213 print STDERR "attr $c = $row->{$c}\n" if ($debug > 2);
214 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
217 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
219 # add the body text to the document object
220 my $path = $row->{'filepath'};
221 $doc->add_text($path);
222 $path =~ s/(.)/$1 /g;
223 $doc->add_hidden_text($path);
225 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
227 # register the document object to the database
228 $hest_node->put_doc($doc) if ($hest_node);
235 $offset += EST_CHUNK;
237 } while ($results == EST_CHUNK);
239 my $dur = (time() - $t) || 1;
240 printf(" [%.2f/s dur: %s]\n",
250 if ( ( $opt{i} || $opt{j} ) && !$opt{c} ) {
252 print "force update of Hyper Estraier index ";
253 print "by -i flag" if ($opt{i});
254 print "by -j flag" if ($opt{j});
262 my $index = shift || return;
263 my ($table,$col,$unique) = split(/:/, $index);
266 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
267 $dbh->do(qq{ create $unique index $index on $table($col) });
270 print "creating tables...\n";
274 ID SERIAL PRIMARY KEY,
275 name VARCHAR(30) NOT NULL,
279 create table shares (
280 ID SERIAL PRIMARY KEY,
281 hostID INTEGER NOT NULL references hosts(id),
282 name VARCHAR(30) NOT NULL,
283 share VARCHAR(200) NOT NULL
287 ID SERIAL PRIMARY KEY,
288 num INTEGER NOT NULL,
289 name VARCHAR(255) NOT NULL,
293 create table backups (
295 hostID INTEGER NOT NULL references hosts(id),
296 num INTEGER NOT NULL,
297 date integer NOT NULL,
298 type CHAR(4) not null,
299 shareID integer not null references shares(id),
300 size bigint not null,
301 inc_size bigint not null default -1,
302 inc_deleted boolean default false,
303 parts integer not null default 0,
309 shareID INTEGER NOT NULL references shares(id),
310 backupNum INTEGER NOT NULL,
311 name VARCHAR(255) NOT NULL,
312 path VARCHAR(255) NOT NULL,
313 date integer NOT NULL,
314 type INTEGER NOT NULL,
315 size bigint NOT NULL,
319 create table archive (
322 total_size bigint default -1,
324 username varchar(20) not null,
325 date timestamp default now(),
329 create table archive_backup (
330 archive_id int not null references archive(id) on delete cascade,
331 backup_id int not null references backups(id),
332 primary key(archive_id, backup_id)
335 create table archive_burned (
336 archive_id int references archive(id),
337 date timestamp default now(),
338 part int not null default 1,
339 copy int not null default 1,
340 iso_size bigint default -1
343 create table backup_parts (
345 backup_id int references backups(id),
346 part_nr int not null check (part_nr > 0),
347 tar_size bigint not null check (tar_size > 0),
348 size bigint not null check (size > 0),
350 items int not null check (items > 0),
351 date timestamp default now(),
355 -- report backups and corresponding dvd
357 create view backups_on_dvds as
360 hosts.name || ':' || shares.name as share,
362 backups.type as type,
363 abstime(backups.date) as backup_date,
364 backups.size as size,
365 backups.inc_size as gzip_size,
366 archive.id as archive_id,
369 join shares on backups.shareid=shares.id
370 join hosts on shares.hostid = hosts.id
371 left outer join archive_backup on backups.id = archive_backup.backup_id
372 left outer join archive on archive_backup.archive_id = archive.id
373 where backups.parts > 0 and size > 0
374 order by backups.date
378 print "creating indexes: ";
380 foreach my $index (qw(
393 archive_burned:archive_id
394 backup_parts:backup_id,part_nr:unique
399 print " creating sequence: ";
400 foreach my $seq (qw/dvd_nr/) {
402 $dbh->do( qq{ CREATE SEQUENCE $seq } );
405 print " creating triggers ";
406 $dbh->do( <<__END_OF_TRIGGER__ );
408 create or replace function backup_parts_check() returns trigger as '
414 -- raise notice ''old/new parts %/% backup_id %/%'', old.parts, new.parts, old.id, new.id;
415 if (TG_OP=''UPDATE'') then
417 b_parts := new.parts;
418 elsif (TG_OP = ''INSERT'') then
420 b_parts := new.parts;
422 b_counted := (select count(*) from backup_parts where backup_id = b_id);
423 -- raise notice ''backup % parts %'', b_id, b_parts;
424 if ( b_parts != b_counted ) then
425 raise exception ''Update of backup % aborted, requested % parts and there are really % parts'', b_id, b_parts, b_counted;
431 create trigger do_backup_parts_check
432 after insert or update or delete on backups
433 for each row execute procedure backup_parts_check();
435 create or replace function backup_backup_parts_check() returns trigger as '
441 if (TG_OP = ''INSERT'') then
442 -- raise notice ''trigger: % backup_id %'', TG_OP, new.backup_id;
443 b_id = new.backup_id;
444 my_part_nr = new.part_nr;
445 execute ''update backups set parts = parts + 1 where id = '' || b_id;
446 elsif (TG_OP = ''DELETE'') then
447 -- raise notice ''trigger: % backup_id %'', TG_OP, old.backup_id;
448 b_id = old.backup_id;
449 my_part_nr = old.part_nr;
450 execute ''update backups set parts = parts - 1 where id = '' || b_id;
452 calc_part := (select count(part_nr) from backup_parts where backup_id = b_id);
453 if ( my_part_nr != calc_part ) then
454 raise exception ''Update of backup_parts with backup_id % aborted, requested part_nr is % and calulated next is %'', b_id, my_part_nr, calc_part;
460 create trigger do_backup_backup_parts_check
461 after insert or update or delete on backup_parts
462 for each row execute procedure backup_backup_parts_check();
472 ## delete data before inseting ##
475 foreach my $table (qw(files dvds backups shares hosts)) {
477 $dbh->do(qq{ DELETE FROM $table });
484 ## insert new values ##
487 $hosts = $bpc->HostInfoRead();
493 $sth->{insert_hosts} = $dbh->prepare(qq{
494 INSERT INTO hosts (name, IP) VALUES (?,?)
497 $sth->{hosts_by_name} = $dbh->prepare(qq{
498 SELECT ID FROM hosts WHERE name=?
501 $sth->{backups_count} = $dbh->prepare(qq{
504 WHERE hostID=? AND num=? AND shareid=?
507 $sth->{insert_backups} = $dbh->prepare(qq{
508 INSERT INTO backups (hostID, num, date, type, shareid, size)
509 VALUES (?,?,?,?,?,-1)
512 $sth->{update_backups_size} = $dbh->prepare(qq{
513 UPDATE backups SET size = ?
514 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
517 $sth->{insert_files} = $dbh->prepare(qq{
519 (shareID, backupNum, name, path, date, type, size)
520 VALUES (?,?,?,?,?,?,?)
523 my @hosts = keys %{$hosts};
526 foreach my $host_key (@hosts) {
528 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
530 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
532 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
533 $sth->{insert_hosts}->execute(
534 $hosts->{$host_key}->{'host'},
535 $hosts->{$host_key}->{'ip'}
538 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
542 # get backups for a host
543 my @backups = $bpc->BackupInfoRead($hostname);
544 warn "XXXX ",dump(@backups);
545 my $incs = scalar @backups;
547 my $host_header = sprintf("host %s [%d/%d]: %d increments\n",
548 $hosts->{$host_key}->{'host'},
553 print $host_header unless ($opt{q});
558 foreach my $backup (@backups) {
561 last if ($opt{m} && $inc_nr > $opt{m});
563 my $backupNum = $backup->{'num'};
564 my @backupShares = ();
566 my $share_header = sprintf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
567 $hosts->{$host_key}->{'host'},
568 $inc_nr, $incs, $backupNum,
569 $backup->{type} || '?',
570 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
571 strftime($t_fmt,localtime($backup->{startTime})),
572 fmt_time($backup->{endTime} - $backup->{startTime})
574 print $share_header unless ($opt{q});
576 my $files = BackupPC::View->new($bpc, $hostname, \@backups, { only_first => 1 });
578 foreach my $share ($files->shareList($backupNum)) {
582 $shareID = getShareID($share, $hostID, $hostname);
584 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
585 my ($count) = $sth->{backups_count}->fetchrow_array();
586 # skip if allready in database!
587 next if ($count > 0);
589 # dump host and share header for -q
593 $host_header = undef;
599 print curr_time," ", $share;
601 $sth->{insert_backups}->execute(
604 $backup->{'endTime'},
605 substr($backup->{'type'},0,4),
609 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
612 $sth->{update_backups_size}->execute(
616 $backup->{'endTime'},
617 substr($backup->{'type'},0,4),
628 my $dur = (time() - $t) || 1;
629 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
631 ($size / 1024 / 1024),
636 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
645 print "total duration: ",fmt_time(time() - $start_t),"\n";
651 my ($share, $hostID, $hostname) = @_;
653 $sth->{share_id} ||= $dbh->prepare(qq{
654 SELECT ID FROM shares WHERE hostID=? AND name=?
657 $sth->{share_id}->execute($hostID,$share);
659 my ($id) = $sth->{share_id}->fetchrow_array();
661 return $id if (defined($id));
663 $sth->{insert_share} ||= $dbh->prepare(qq{
669 my $drop_down = $hostname . '/' . $share;
670 $drop_down =~ s#//+#/#g;
672 $sth->{insert_share}->execute($hostID,$share, $drop_down);
673 return $dbh->last_insert_id(undef,undef,'shares',undef);
681 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
683 return $beenThere->{$key} if (defined($beenThere->{$key}));
685 $sth->{file_in_db} ||= $dbh->prepare(qq{
687 WHERE shareID = ? and
690 ( date = ? or date = ? or date = ? )
694 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
695 $sth->{file_in_db}->execute(@param);
696 my $rows = $sth->{file_in_db}->rows;
697 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
699 $beenThere->{$key}++;
701 $sth->{'insert_files'}->execute(@data) unless ($rows);
705 ####################################################
706 # recursing through filesystem structure and #
707 # and returning flattened files list #
708 ####################################################
709 sub recurseDir($$$$$$$$) {
711 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
713 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
715 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
720 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
721 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
723 # first, add all the entries in current directory
724 foreach my $path_key (keys %{$filesInBackup}) {
725 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
730 $filesInBackup->{$path_key}->{'relPath'},
731 $filesInBackup->{$path_key}->{'mtime'},
732 $filesInBackup->{$path_key}->{'type'},
733 $filesInBackup->{$path_key}->{'size'}
736 my $key = join(" ", (
740 $filesInBackup->{$path_key}->{'mtime'},
741 $filesInBackup->{$path_key}->{'size'}
744 my $key_dst_prev = join(" ", (
748 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
749 $filesInBackup->{$path_key}->{'size'}
752 my $key_dst_next = join(" ", (
756 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
757 $filesInBackup->{$path_key}->{'size'}
762 ! defined($beenThere->{$key}) &&
763 ! defined($beenThere->{$key_dst_prev}) &&
764 ! defined($beenThere->{$key_dst_next}) &&
765 ! ($found = found_in_db($key, @data))
767 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
769 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
770 $new_dirs++ unless ($found);
771 print STDERR " dir\n" if ($debug >= 2);
773 $new_files++ unless ($found);
774 print STDERR " file\n" if ($debug >= 2);
776 $size += $filesInBackup->{$path_key}->{'size'} || 0;
779 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
782 my $full_path = $dir . '/' . $path_key;
783 push @stack, $full_path;
784 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
786 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
798 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
800 while ( my $dir = shift @stack ) {
801 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
802 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
811 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);