4 use lib "/usr/local/BackupPC/lib";
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
16 use Data::Dump qw(dump);
18 use constant BPC_FTYPE_DIR => 5;
19 use constant EST_CHUNK => 4096;
21 # daylight saving time change offset for 1h
22 my $dst_offset = 60 * 60;
29 my $pid_path = abs_path($0);
30 $pid_path =~ s/\W+/_/g;
32 my $pidfile = new File::Pid({
33 file => "/tmp/search_update.pid",
36 if (my $pid = $pidfile->running ) {
37 die "$0 already running: $pid\n";
38 } elsif ($pidfile->pid ne $$) {
40 $pidfile = new File::Pid;
42 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
45 my $t_fmt = '%Y-%m-%d %H:%M:%S';
48 my $bpc = BackupPC::Lib->new || die;
49 my %Conf = $bpc->Conf();
50 my $TopDir = $bpc->TopDir();
53 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
54 my $user = $Conf{SearchUser} || '';
56 my $index_node_url = $Conf{HyperEstraierIndex};
58 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
62 if ( !getopts("cdm:v:ijfqh:", \%opt ) ) {
64 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
67 -c create database on first use
68 -d delete database before import
69 -m num import just num increments for one host
70 -h "h1 h2" import just single HostList hosts
71 -v num set verbosity (debug) level (default $debug)
72 -i update Hyper Estraier full text index
73 -j update full text, don't check existing files
74 -f don't do anything with full text index
75 -q be quiet for hosts without changes
77 Option -j is variation on -i. It will allow faster initial creation
78 of full-text index from existing database.
80 Option -f will create database which is out of sync with full text index. You
81 will have to re-run $0 with -i to fix it.
88 print "Debug level at $opt{v}\n";
91 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
92 $index_node_url = undef;
98 my $t = shift || return;
100 my ($ss,$mm,$hh) = gmtime($t);
101 $out .= "${hh}h" if ($hh);
102 $out .= sprintf("%02d:%02d", $mm,$ss);
107 return strftime($t_fmt,localtime());
112 my ($host_id, $share_id, $num) = @_;
114 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
116 print curr_time," updating fulltext:";
123 my $search = BackupPC::Search->search_module;
131 if (defined($host_id) && defined($share_id) && defined($num)) {
138 @data = ( $host_id, $share_id, $num );
141 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
143 my $sth = $dbh->prepare(qq{
147 shares.name AS sname,
148 -- shares.share AS sharename,
149 files.backupnum AS backupnum,
150 -- files.name AS filename,
151 files.path AS filepath,
155 files.shareid AS shareid,
156 backups.date AS backup_date
158 INNER JOIN shares ON files.shareID=shares.ID
159 INNER JOIN hosts ON hosts.ID = shares.hostID
160 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
165 $sth->execute(@data);
166 $results = $sth->rows;
169 print " - no new files\n";
176 my $t = shift || return;
177 my $iso = BackupPC::Lib::timeStamp($t);
182 while (my $row = $sth->fetchrow_hashref()) {
183 next if $search->exists( $row );
184 $search->add_doc( $row );
190 $offset += EST_CHUNK;
192 } while ($results == EST_CHUNK);
196 my $dur = (time() - $t) || 1;
197 printf(" [%.2f/s dur: %s]\n",
207 if ( ( $opt{i} || $opt{j} ) && !$opt{c} ) {
209 print "force update of Hyper Estraier index ";
210 print "by -i flag" if ($opt{i});
211 print "by -j flag" if ($opt{j});
219 my $index = shift || return;
220 my ($table,$col,$unique) = split(/:/, $index);
223 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
224 $dbh->do(qq{ create $unique index $index on $table($col) });
227 print "creating tables...\n";
231 ID SERIAL PRIMARY KEY,
232 name VARCHAR(30) NOT NULL,
236 create table shares (
237 ID SERIAL PRIMARY KEY,
238 hostID INTEGER NOT NULL references hosts(id),
239 name VARCHAR(30) NOT NULL,
240 share VARCHAR(200) NOT NULL
244 ID SERIAL PRIMARY KEY,
245 num INTEGER NOT NULL,
246 name VARCHAR(255) NOT NULL,
250 create table backups (
252 hostID INTEGER NOT NULL references hosts(id),
253 num INTEGER NOT NULL,
254 date integer NOT NULL,
255 type CHAR(4) not null,
256 shareID integer not null references shares(id),
257 size bigint not null,
258 inc_size bigint not null default -1,
259 inc_deleted boolean default false,
260 parts integer not null default 0,
266 shareID INTEGER NOT NULL references shares(id),
267 backupNum INTEGER NOT NULL,
268 name VARCHAR(255) NOT NULL,
269 path VARCHAR(255) NOT NULL,
270 date integer NOT NULL,
271 type INTEGER NOT NULL,
272 size bigint NOT NULL,
276 create table archive (
279 total_size bigint default -1,
281 username varchar(20) not null,
282 date timestamp default now(),
286 create table archive_backup (
287 archive_id int not null references archive(id) on delete cascade,
288 backup_id int not null references backups(id),
289 primary key(archive_id, backup_id)
292 create table archive_burned (
293 archive_id int references archive(id),
294 date timestamp default now(),
295 part int not null default 1,
296 copy int not null default 1,
297 iso_size bigint default -1
300 create table backup_parts (
302 backup_id int references backups(id),
303 part_nr int not null check (part_nr > 0),
304 tar_size bigint not null check (tar_size > 0),
305 size bigint not null check (size > 0),
307 items int not null check (items > 0),
308 date timestamp default now(),
312 -- report backups and corresponding dvd
314 create view backups_on_dvds as
317 hosts.name || ':' || shares.name as share,
319 backups.type as type,
320 abstime(backups.date) as backup_date,
321 backups.size as size,
322 backups.inc_size as gzip_size,
323 archive.id as archive_id,
326 join shares on backups.shareid=shares.id
327 join hosts on shares.hostid = hosts.id
328 left outer join archive_backup on backups.id = archive_backup.backup_id
329 left outer join archive on archive_backup.archive_id = archive.id
330 where backups.parts > 0 and size > 0
331 order by backups.date
335 print "creating indexes: ";
337 foreach my $index (qw(
350 archive_burned:archive_id
351 backup_parts:backup_id,part_nr:unique
356 print " creating sequence: ";
357 foreach my $seq (qw/dvd_nr/) {
359 $dbh->do( qq{ CREATE SEQUENCE $seq } );
364 print " creating triggers ";
365 $dbh->do( <<__END_OF_TRIGGER__ );
367 create or replace function backup_parts_check() returns trigger as '
373 -- raise notice ''old/new parts %/% backup_id %/%'', old.parts, new.parts, old.id, new.id;
374 if (TG_OP=''UPDATE'') then
376 b_parts := new.parts;
377 elsif (TG_OP = ''INSERT'') then
379 b_parts := new.parts;
381 b_counted := (select count(*) from backup_parts where backup_id = b_id);
382 -- raise notice ''backup % parts %'', b_id, b_parts;
383 if ( b_parts != b_counted ) then
384 raise exception ''Update of backup % aborted, requested % parts and there are really % parts'', b_id, b_parts, b_counted;
390 create trigger do_backup_parts_check
391 after insert or update or delete on backups
392 for each row execute procedure backup_parts_check();
394 create or replace function backup_backup_parts_check() returns trigger as '
400 if (TG_OP = ''INSERT'') then
401 -- raise notice ''trigger: % backup_id %'', TG_OP, new.backup_id;
402 b_id = new.backup_id;
403 my_part_nr = new.part_nr;
404 execute ''update backups set parts = parts + 1 where id = '' || b_id;
405 elsif (TG_OP = ''DELETE'') then
406 -- raise notice ''trigger: % backup_id %'', TG_OP, old.backup_id;
407 b_id = old.backup_id;
408 my_part_nr = old.part_nr;
409 execute ''update backups set parts = parts - 1 where id = '' || b_id;
411 calc_part := (select count(part_nr) from backup_parts where backup_id = b_id);
412 if ( my_part_nr != calc_part ) then
413 raise exception ''Update of backup_parts with backup_id % aborted, requested part_nr is % and calulated next is %'', b_id, my_part_nr, calc_part;
419 create trigger do_backup_backup_parts_check
420 after insert or update or delete on backup_parts
421 for each row execute procedure backup_backup_parts_check();
433 ## delete data before inseting ##
436 foreach my $table (qw(files dvds backups shares hosts)) {
438 $dbh->do(qq{ DELETE FROM $table });
445 ## insert new values ##
448 $hosts = $bpc->HostInfoRead();
454 $sth->{insert_hosts} = $dbh->prepare(qq{
455 INSERT INTO hosts (name, IP) VALUES (?,?)
458 $sth->{hosts_by_name} = $dbh->prepare(qq{
459 SELECT ID FROM hosts WHERE name=?
462 $sth->{backups_count} = $dbh->prepare(qq{
465 WHERE hostID=? AND num=? AND shareid=?
468 $sth->{insert_backups} = $dbh->prepare(qq{
469 INSERT INTO backups (hostID, num, date, type, shareid, size)
470 VALUES (?,?,?,?,?,-1)
473 $sth->{update_backups_size} = $dbh->prepare(qq{
474 UPDATE backups SET size = ?
475 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
478 $sth->{insert_files} = $dbh->prepare(qq{
480 (shareID, backupNum, name, path, date, type, size)
481 VALUES (?,?,?,?,?,?,?)
484 my @hosts = keys %{$hosts};
488 if ( exists $opt{h} ) {
489 $host_regex = $opt{h};
490 $host_regex =~ s/\s+/|/g;
491 $host_regex = '^' . $host_regex . '$';
494 foreach my $host_key (@hosts) {
496 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
498 next if $host_regex && $hostname =~ m/$host_regex/;
500 $sth->{hosts_by_name}->execute($hostname);
502 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
503 $sth->{insert_hosts}->execute(
504 $hosts->{$host_key}->{'host'},
505 $hosts->{$host_key}->{'ip'}
508 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
512 # get backups for a host
513 my @backups = $bpc->BackupInfoRead($hostname);
514 my $incs = scalar @backups;
516 my $host_header = sprintf("host %s [%d/%d]: %d increments\n",
517 $hosts->{$host_key}->{'host'},
522 print $host_header unless ($opt{q});
527 foreach my $backup (@backups) {
530 last if (defined $opt{m} && $inc_nr > $opt{m});
532 my $backupNum = $backup->{'num'};
533 my @backupShares = ();
535 my $share_header = sprintf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
536 $hosts->{$host_key}->{'host'},
537 $inc_nr, $incs, $backupNum,
538 $backup->{type} || '?',
539 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
540 strftime($t_fmt,localtime($backup->{startTime})),
541 fmt_time($backup->{endTime} - $backup->{startTime})
543 print $share_header unless ($opt{q});
545 my $files = BackupPC::View->new($bpc, $hostname, \@backups, { only_first => 1 });
547 foreach my $share ($files->shareList($backupNum)) {
551 $shareID = getShareID($share, $hostID, $hostname);
553 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
554 my ($count) = $sth->{backups_count}->fetchrow_array();
555 # skip if allready in database!
556 next if ($count > 0);
558 # dump host and share header for -q
562 $host_header = undef;
568 print curr_time," ", $share;
570 $sth->{insert_backups}->execute(
573 $backup->{'endTime'},
574 substr($backup->{'type'},0,4),
578 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
581 $sth->{update_backups_size}->execute(
585 $backup->{'endTime'},
586 substr($backup->{'type'},0,4),
597 my $dur = (time() - $t) || 1;
598 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
600 ($size / 1024 / 1024),
606 eval { hest_update($hostID, $shareID, $backupNum) };
607 warn "ERROR: $@" if $@;
617 print "total duration: ",fmt_time(time() - $start_t),"\n";
623 my ($share, $hostID, $hostname) = @_;
625 $sth->{share_id} ||= $dbh->prepare(qq{
626 SELECT ID FROM shares WHERE hostID=? AND name=?
629 $sth->{share_id}->execute($hostID,$share);
631 my ($id) = $sth->{share_id}->fetchrow_array();
633 return $id if (defined($id));
635 $sth->{insert_share} ||= $dbh->prepare(qq{
641 my $drop_down = $hostname . '/' . $share;
642 $drop_down =~ s#//+#/#g;
644 $sth->{insert_share}->execute($hostID,$share, $drop_down);
645 return $dbh->last_insert_id(undef,undef,'shares',undef);
653 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
655 return $beenThere->{$key} if (defined($beenThere->{$key}));
657 $sth->{file_in_db} ||= $dbh->prepare(qq{
659 WHERE shareID = ? and
662 ( date = ? or date = ? or date = ? )
666 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
667 $sth->{file_in_db}->execute(@param);
668 my $rows = $sth->{file_in_db}->rows;
669 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
671 $beenThere->{$key}++;
673 $sth->{'insert_files'}->execute(@data) unless ($rows);
677 ####################################################
678 # recursing through filesystem structure and #
679 # and returning flattened files list #
680 ####################################################
681 sub recurseDir($$$$$$$$) {
683 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
685 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
687 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
692 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
693 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
695 # first, add all the entries in current directory
696 foreach my $path_key (keys %{$filesInBackup}) {
697 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
702 $filesInBackup->{$path_key}->{'relPath'},
703 $filesInBackup->{$path_key}->{'mtime'},
704 $filesInBackup->{$path_key}->{'type'},
705 $filesInBackup->{$path_key}->{'size'}
708 my $key = join(" ", (
712 $filesInBackup->{$path_key}->{'mtime'},
713 $filesInBackup->{$path_key}->{'size'}
716 my $key_dst_prev = join(" ", (
720 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
721 $filesInBackup->{$path_key}->{'size'}
724 my $key_dst_next = join(" ", (
728 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
729 $filesInBackup->{$path_key}->{'size'}
734 ! defined($beenThere->{$key}) &&
735 ! defined($beenThere->{$key_dst_prev}) &&
736 ! defined($beenThere->{$key_dst_next}) &&
737 ! ($found = found_in_db($key, @data))
739 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
741 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
742 $new_dirs++ unless ($found);
743 print STDERR " dir\n" if ($debug >= 2);
745 $new_files++ unless ($found);
746 print STDERR " file\n" if ($debug >= 2);
748 $size += $filesInBackup->{$path_key}->{'size'} || 0;
751 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
754 my $full_path = $dir . '/' . $path_key;
755 push @stack, $full_path;
756 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
758 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
770 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
772 while ( my $dir = shift @stack ) {
773 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
774 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
783 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);