1 #!/usr/local/bin/perl -w
4 use lib "__INSTALLDIR__/lib";
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
17 use constant BPC_FTYPE_DIR => 5;
18 use constant EST_CHUNK => 4096;
20 # daylight saving time change offset for 1h
21 my $dst_offset = 60 * 60;
28 my $pid_path = abs_path($0);
29 $pid_path =~ s/\W+/_/g;
31 my $pidfile = new File::Pid({
32 file => "/tmp/$pid_path",
35 if (my $pid = $pidfile->running ) {
36 die "$0 already running: $pid\n";
37 } elsif ($pidfile->pid ne $$) {
39 $pidfile = new File::Pid;
41 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
44 my $t_fmt = '%Y-%m-%d %H:%M:%S';
47 my $bpc = BackupPC::Lib->new || die;
48 my %Conf = $bpc->Conf();
49 my $TopDir = $bpc->TopDir();
52 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
53 my $user = $Conf{SearchUser} || '';
55 my $index_node_url = $Conf{HyperEstraierIndex};
57 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
61 if ( !getopts("cdm:v:ijfq", \%opt ) ) {
63 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
66 -c create database on first use
67 -d delete database before import
68 -m num import just num increments for one host
69 -v num set verbosity (debug) level (default $debug)
70 -i update Hyper Estraier full text index
71 -j update full text, don't check existing files
72 -f don't do anything with full text index
73 -q be quiet for hosts without changes
75 Option -j is variation on -i. It will allow faster initial creation
76 of full-text index from existing database.
78 Option -f will create database which is out of sync with full text index. You
79 will have to re-run $0 with -i to fix it.
86 print "Debug level at $opt{v}\n";
89 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
90 $index_node_url = undef;
96 my $t = shift || return;
98 my ($ss,$mm,$hh) = gmtime($t);
99 $out .= "${hh}h" if ($hh);
100 $out .= sprintf("%02d:%02d", $mm,$ss);
105 return strftime($t_fmt,localtime());
112 my ($host_id, $share_id, $num) = @_;
114 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
116 unless (defined($index_node_url)) {
117 print STDERR "HyperEstraier support not enabled in configuration\n";
122 print curr_time," updating Hyper Estraier:";
129 if ($index_node_url) {
130 print " opening index $index_node_url";
131 $hest_node ||= Search::Estraier::Node->new(
132 url => $index_node_url,
137 print " via node URL";
146 if (defined($host_id) && defined($share_id) && defined($num)) {
153 @data = ( $host_id, $share_id, $num );
156 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
158 my $sth = $dbh->prepare(qq{
162 shares.name AS sname,
163 -- shares.share AS sharename,
164 files.backupnum AS backupnum,
165 -- files.name AS filename,
166 files.path AS filepath,
170 files.shareid AS shareid,
171 backups.date AS backup_date
173 INNER JOIN shares ON files.shareID=shares.ID
174 INNER JOIN hosts ON hosts.ID = shares.hostID
175 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
180 $sth->execute(@data);
181 $results = $sth->rows;
184 print " - no new files\n";
191 my $t = shift || return;
192 my $iso = BackupPC::Lib::timeStamp($t);
197 while (my $row = $sth->fetchrow_hashref()) {
199 my $uri = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath};
200 unless ($skip_check && $hest_node) {
201 my $id = $hest_node->uri_to_id($uri);
202 next if ($id && $id == -1);
205 # create a document object
206 my $doc = Search::Estraier::Document->new;
208 # add attributes to the document object
209 $doc->add_attr('@uri', $uri);
211 foreach my $c (@{ $sth->{NAME} }) {
212 print STDERR "attr $c = $row->{$c}\n" if ($debug > 2);
213 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
216 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
218 # add the body text to the document object
219 my $path = $row->{'filepath'};
220 $doc->add_text($path);
221 $path =~ s/(.)/$1 /g;
222 $doc->add_hidden_text($path);
224 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
226 # register the document object to the database
228 $hest_node->put_doc($doc);
237 $offset += EST_CHUNK;
239 } while ($results == EST_CHUNK);
241 my $dur = (time() - $t) || 1;
242 printf(" [%.2f/s dur: %s]\n",
252 if ( ( $opt{i} || $opt{j} ) && !$opt{c} ) {
254 print "force update of Hyper Estraier index ";
255 print "by -i flag" if ($opt{i});
256 print "by -j flag" if ($opt{j});
264 my $index = shift || return;
265 my ($table,$col,$unique) = split(/:/, $index);
268 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
269 $dbh->do(qq{ create $unique index $index on $table($col) });
272 print "creating tables...\n";
276 ID SERIAL PRIMARY KEY,
277 name VARCHAR(30) NOT NULL,
281 create table shares (
282 ID SERIAL PRIMARY KEY,
283 hostID INTEGER NOT NULL references hosts(id),
284 name VARCHAR(30) NOT NULL,
285 share VARCHAR(200) NOT NULL
289 ID SERIAL PRIMARY KEY,
290 num INTEGER NOT NULL,
291 name VARCHAR(255) NOT NULL,
295 create table backups (
297 hostID INTEGER NOT NULL references hosts(id),
298 num INTEGER NOT NULL,
299 date integer NOT NULL,
300 type CHAR(4) not null,
301 shareID integer not null references shares(id),
302 size bigint not null,
303 inc_size bigint not null default -1,
304 inc_deleted boolean default false,
305 parts integer not null default 0,
311 shareID INTEGER NOT NULL references shares(id),
312 backupNum INTEGER NOT NULL,
313 name VARCHAR(255) NOT NULL,
314 path VARCHAR(255) NOT NULL,
315 date integer NOT NULL,
316 type INTEGER NOT NULL,
317 size bigint NOT NULL,
321 create table archive (
324 total_size bigint default -1,
326 username varchar(20) not null,
327 date timestamp default now(),
331 create table archive_backup (
332 archive_id int not null references archive(id) on delete cascade,
333 backup_id int not null references backups(id),
334 primary key(archive_id, backup_id)
337 create table archive_burned (
338 archive_id int references archive(id),
339 date timestamp default now(),
340 part int not null default 1,
341 copy int not null default 1,
342 iso_size bigint default -1
345 create table backup_parts (
347 backup_id int references backups(id),
348 part_nr int not null check (part_nr > 0),
349 tar_size bigint not null check (tar_size > 0),
350 size bigint not null check (size > 0),
352 items int not null check (items > 0),
353 date timestamp default now(),
358 print "creating indexes: ";
360 foreach my $index (qw(
373 archive_burned:archive_id
374 backup_parts:backup_id,part_nr
379 print " creating sequence: ";
380 foreach my $seq (qw/dvd_nr/) {
382 $dbh->do( qq{ CREATE SEQUENCE $seq } );
385 print " creating triggers ";
387 create or replace function backup_parts_check() returns trigger as '
393 if (TG_OP=''UPDATE'') then
395 b_parts := old.parts;
396 elsif (TG_OP = ''INSERT'') then
398 b_parts := new.parts;
400 b_counted := (select count(*) from backup_parts where backup_id = b_id);
401 if ( b_parts != b_counted ) then
402 raise exception ''Update of backup % aborted, requested % parts and there are really % parts'', b_id, b_parts, b_counted;
408 create trigger do_backup_parts_check
409 after insert or update or delete on backups
410 for each row execute procedure backup_parts_check();
419 ## delete data before inseting ##
422 foreach my $table (qw(files dvds backups shares hosts)) {
424 $dbh->do(qq{ DELETE FROM $table });
431 ## insert new values ##
434 $hosts = $bpc->HostInfoRead();
440 $sth->{insert_hosts} = $dbh->prepare(qq{
441 INSERT INTO hosts (name, IP) VALUES (?,?)
444 $sth->{hosts_by_name} = $dbh->prepare(qq{
445 SELECT ID FROM hosts WHERE name=?
448 $sth->{backups_count} = $dbh->prepare(qq{
451 WHERE hostID=? AND num=? AND shareid=?
454 $sth->{insert_backups} = $dbh->prepare(qq{
455 INSERT INTO backups (hostID, num, date, type, shareid, size)
456 VALUES (?,?,?,?,?,-1)
459 $sth->{update_backups_size} = $dbh->prepare(qq{
460 UPDATE backups SET size = ?
461 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
464 $sth->{insert_files} = $dbh->prepare(qq{
466 (shareID, backupNum, name, path, date, type, size)
467 VALUES (?,?,?,?,?,?,?)
470 my @hosts = keys %{$hosts};
473 foreach my $host_key (@hosts) {
475 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
477 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
479 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
480 $sth->{insert_hosts}->execute(
481 $hosts->{$host_key}->{'host'},
482 $hosts->{$host_key}->{'ip'}
485 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
489 # get backups for a host
490 my @backups = $bpc->BackupInfoRead($hostname);
491 my $incs = scalar @backups;
493 my $host_header = sprintf("host %s [%d/%d]: %d increments\n",
494 $hosts->{$host_key}->{'host'},
499 print $host_header unless ($opt{q});
504 foreach my $backup (@backups) {
507 last if ($opt{m} && $inc_nr > $opt{m});
509 my $backupNum = $backup->{'num'};
510 my @backupShares = ();
512 my $share_header = sprintf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
513 $hosts->{$host_key}->{'host'},
514 $inc_nr, $incs, $backupNum,
515 $backup->{type} || '?',
516 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
517 strftime($t_fmt,localtime($backup->{startTime})),
518 fmt_time($backup->{endTime} - $backup->{startTime})
520 print $share_header unless ($opt{q});
522 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
523 foreach my $share ($files->shareList($backupNum)) {
527 $shareID = getShareID($share, $hostID, $hostname);
529 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
530 my ($count) = $sth->{backups_count}->fetchrow_array();
531 # skip if allready in database!
532 next if ($count > 0);
534 # dump host and share header for -q
538 $host_header = undef;
544 print curr_time," ", $share;
546 $sth->{insert_backups}->execute(
549 $backup->{'endTime'},
550 substr($backup->{'type'},0,4),
554 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
557 $sth->{update_backups_size}->execute(
561 $backup->{'endTime'},
562 substr($backup->{'type'},0,4),
573 my $dur = (time() - $t) || 1;
574 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
576 ($size / 1024 / 1024),
581 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
590 print "total duration: ",fmt_time(time() - $start_t),"\n";
596 my ($share, $hostID, $hostname) = @_;
598 $sth->{share_id} ||= $dbh->prepare(qq{
599 SELECT ID FROM shares WHERE hostID=? AND name=?
602 $sth->{share_id}->execute($hostID,$share);
604 my ($id) = $sth->{share_id}->fetchrow_array();
606 return $id if (defined($id));
608 $sth->{insert_share} ||= $dbh->prepare(qq{
614 my $drop_down = $hostname . '/' . $share;
615 $drop_down =~ s#//+#/#g;
617 $sth->{insert_share}->execute($hostID,$share, $drop_down);
618 return $dbh->last_insert_id(undef,undef,'shares',undef);
626 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
628 return $beenThere->{$key} if (defined($beenThere->{$key}));
630 $sth->{file_in_db} ||= $dbh->prepare(qq{
632 WHERE shareID = ? and
635 ( date = ? or date = ? or date = ? )
639 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
640 $sth->{file_in_db}->execute(@param);
641 my $rows = $sth->{file_in_db}->rows;
642 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
644 $beenThere->{$key}++;
646 $sth->{'insert_files'}->execute(@data) unless ($rows);
650 ####################################################
651 # recursing through filesystem structure and #
652 # and returning flattened files list #
653 ####################################################
654 sub recurseDir($$$$$$$$) {
656 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
658 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
660 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
665 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
666 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
668 # first, add all the entries in current directory
669 foreach my $path_key (keys %{$filesInBackup}) {
670 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
675 $filesInBackup->{$path_key}->{'relPath'},
676 $filesInBackup->{$path_key}->{'mtime'},
677 $filesInBackup->{$path_key}->{'type'},
678 $filesInBackup->{$path_key}->{'size'}
681 my $key = join(" ", (
685 $filesInBackup->{$path_key}->{'mtime'},
686 $filesInBackup->{$path_key}->{'size'}
689 my $key_dst_prev = join(" ", (
693 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
694 $filesInBackup->{$path_key}->{'size'}
697 my $key_dst_next = join(" ", (
701 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
702 $filesInBackup->{$path_key}->{'size'}
707 ! defined($beenThere->{$key}) &&
708 ! defined($beenThere->{$key_dst_prev}) &&
709 ! defined($beenThere->{$key_dst_next}) &&
710 ! ($found = found_in_db($key, @data))
712 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
714 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
715 $new_dirs++ unless ($found);
716 print STDERR " dir\n" if ($debug >= 2);
718 $new_files++ unless ($found);
719 print STDERR " file\n" if ($debug >= 2);
721 $size += $filesInBackup->{$path_key}->{'size'} || 0;
724 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
727 my $full_path = $dir . '/' . $path_key;
728 push @stack, $full_path;
729 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
731 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
743 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
745 while ( my $dir = shift @stack ) {
746 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
747 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
756 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);