4 use lib "/usr/local/BackupPC/lib";
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
16 use Data::Dump qw(dump);
18 use constant BPC_FTYPE_DIR => 5;
19 use constant EST_CHUNK => 4096;
21 # daylight saving time change offset for 1h
22 my $dst_offset = 60 * 60;
29 my $pid_path = abs_path($0);
30 $pid_path =~ s/\W+/_/g;
32 my $pidfile = new File::Pid({
33 file => "/tmp/search_update.pid",
36 if (my $pid = $pidfile->running ) {
37 die "$0 already running: $pid\n";
38 } elsif ($pidfile->pid ne $$) {
40 $pidfile = new File::Pid;
42 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
45 my $t_fmt = '%Y-%m-%d %H:%M:%S';
48 my $bpc = BackupPC::Lib->new || die;
49 my %Conf = $bpc->Conf();
50 my $TopDir = $bpc->TopDir();
53 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
54 my $user = $Conf{SearchUser} || '';
56 my $index_node_url = $Conf{HyperEstraierIndex};
58 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
62 if ( !getopts("cdm:v:ijfqh:", \%opt ) ) {
64 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
67 -c create database on first use
68 -d delete database before import
69 -m num import just num increments for one host
70 -h "h1 h2" import just single HostList hosts
71 -v num set verbosity (debug) level (default $debug)
72 -i update Hyper Estraier full text index
73 -j update full text, don't check existing files
74 -f don't do anything with full text index
75 -q be quiet for hosts without changes
77 Option -j is variation on -i. It will allow faster initial creation
78 of full-text index from existing database.
80 Option -f will create database which is out of sync with full text index. You
81 will have to re-run $0 with -i to fix it.
88 print "Debug level at $opt{v}\n";
91 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
92 $index_node_url = undef;
98 my $t = shift || return;
100 my ($ss,$mm,$hh) = gmtime($t);
101 $out .= "${hh}h" if ($hh);
102 $out .= sprintf("%02d:%02d", $mm,$ss);
107 return strftime($t_fmt,localtime());
112 my ($host_id, $share_id, $num) = @_;
114 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
116 print curr_time," updating fulltext:";
123 my $search = BackupPC::Search->search_module;
131 if (defined($host_id) && defined($share_id) && defined($num)) {
138 @data = ( $host_id, $share_id, $num );
141 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
143 my $sth = $dbh->prepare(qq{
147 shares.name AS sname,
148 -- shares.share AS sharename,
149 files.backupnum AS backupnum,
150 -- files.name AS filename,
151 files.path AS filepath,
155 files.shareid AS shareid,
156 backups.date AS backup_date
158 INNER JOIN shares ON files.shareID=shares.ID
159 INNER JOIN hosts ON hosts.ID = shares.hostID
160 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
165 $sth->execute(@data);
166 $results = $sth->rows;
169 print " - no new files\n";
176 my $t = shift || return;
177 my $iso = BackupPC::Lib::timeStamp($t);
182 while (my $row = $sth->fetchrow_hashref()) {
183 next if $search->exists( $row );
184 $search->add_doc( $row );
190 $offset += EST_CHUNK;
192 } while ($results == EST_CHUNK);
196 my $dur = (time() - $t) || 1;
197 printf(" [%.2f/s dur: %s]\n",
207 if ( ( $opt{i} || $opt{j} ) && !$opt{c} ) {
209 print "force update of Hyper Estraier index ";
210 print "by -i flag" if ($opt{i});
211 print "by -j flag" if ($opt{j});
219 my $index = shift || return;
220 my ($table,$col,$unique) = split(/:/, $index);
223 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
224 $dbh->do(qq{ create $unique index $index on $table($col) });
227 print "creating tables...\n";
231 ID SERIAL PRIMARY KEY,
232 name VARCHAR(30) NOT NULL,
236 create table shares (
237 ID SERIAL PRIMARY KEY,
238 hostID INTEGER NOT NULL references hosts(id),
239 name VARCHAR(30) NOT NULL,
240 share VARCHAR(200) NOT NULL
244 ID SERIAL PRIMARY KEY,
245 num INTEGER NOT NULL,
246 name VARCHAR(255) NOT NULL,
250 create table backups (
252 hostID INTEGER NOT NULL references hosts(id),
253 num INTEGER NOT NULL,
254 date integer NOT NULL,
255 type CHAR(4) not null,
256 shareID integer not null references shares(id),
257 size bigint not null,
258 inc_size bigint not null default -1,
259 inc_deleted boolean default false,
260 parts integer not null default 0,
266 shareID INTEGER NOT NULL references shares(id),
267 backupNum INTEGER NOT NULL,
268 name VARCHAR(255) NOT NULL,
269 path VARCHAR(255) NOT NULL,
270 date integer NOT NULL,
271 type INTEGER NOT NULL,
272 size bigint NOT NULL,
276 create table archive (
279 total_size bigint default -1,
281 username varchar(20) not null,
282 date timestamp default now(),
286 create table archive_backup (
287 archive_id int not null references archive(id) on delete cascade,
288 backup_id int not null references backups(id),
289 primary key(archive_id, backup_id)
292 create table archive_burned (
293 archive_id int references archive(id),
294 date timestamp default now(),
295 part int not null default 1,
296 copy int not null default 1,
297 iso_size bigint default -1
300 create table backup_parts (
302 backup_id int references backups(id),
303 part_nr int not null check (part_nr > 0),
304 tar_size bigint not null check (tar_size > 0),
305 size bigint not null check (size > 0),
307 items int not null check (items > 0),
308 date timestamp default now(),
312 -- report backups and corresponding dvd
314 create view backups_on_dvds as
317 hosts.name || ':' || shares.name as share,
319 backups.type as type,
320 abstime(backups.date) as backup_date,
321 backups.size as size,
322 backups.inc_size as gzip_size,
323 archive.id as archive_id,
326 join shares on backups.shareid=shares.id
327 join hosts on shares.hostid = hosts.id
328 left outer join archive_backup on backups.id = archive_backup.backup_id
329 left outer join archive on archive_backup.archive_id = archive.id
330 where backups.parts > 0 and size > 0
331 order by backups.date
335 print "creating indexes: ";
337 foreach my $index (qw(
350 archive_burned:archive_id
351 backup_parts:backup_id,part_nr:unique
356 print " creating sequence: ";
357 foreach my $seq (qw/dvd_nr/) {
359 $dbh->do( qq{ CREATE SEQUENCE $seq } );
362 print " creating triggers ";
363 $dbh->do( <<__END_OF_TRIGGER__ );
365 create or replace function backup_parts_check() returns trigger as '
371 -- raise notice ''old/new parts %/% backup_id %/%'', old.parts, new.parts, old.id, new.id;
372 if (TG_OP=''UPDATE'') then
374 b_parts := new.parts;
375 elsif (TG_OP = ''INSERT'') then
377 b_parts := new.parts;
379 b_counted := (select count(*) from backup_parts where backup_id = b_id);
380 -- raise notice ''backup % parts %'', b_id, b_parts;
381 if ( b_parts != b_counted ) then
382 raise exception ''Update of backup % aborted, requested % parts and there are really % parts'', b_id, b_parts, b_counted;
388 create trigger do_backup_parts_check
389 after insert or update or delete on backups
390 for each row execute procedure backup_parts_check();
392 create or replace function backup_backup_parts_check() returns trigger as '
398 if (TG_OP = ''INSERT'') then
399 -- raise notice ''trigger: % backup_id %'', TG_OP, new.backup_id;
400 b_id = new.backup_id;
401 my_part_nr = new.part_nr;
402 execute ''update backups set parts = parts + 1 where id = '' || b_id;
403 elsif (TG_OP = ''DELETE'') then
404 -- raise notice ''trigger: % backup_id %'', TG_OP, old.backup_id;
405 b_id = old.backup_id;
406 my_part_nr = old.part_nr;
407 execute ''update backups set parts = parts - 1 where id = '' || b_id;
409 calc_part := (select count(part_nr) from backup_parts where backup_id = b_id);
410 if ( my_part_nr != calc_part ) then
411 raise exception ''Update of backup_parts with backup_id % aborted, requested part_nr is % and calulated next is %'', b_id, my_part_nr, calc_part;
417 create trigger do_backup_backup_parts_check
418 after insert or update or delete on backup_parts
419 for each row execute procedure backup_backup_parts_check();
429 ## delete data before inseting ##
432 foreach my $table (qw(files dvds backups shares hosts)) {
434 $dbh->do(qq{ DELETE FROM $table });
441 ## insert new values ##
444 $hosts = $bpc->HostInfoRead();
450 $sth->{insert_hosts} = $dbh->prepare(qq{
451 INSERT INTO hosts (name, IP) VALUES (?,?)
454 $sth->{hosts_by_name} = $dbh->prepare(qq{
455 SELECT ID FROM hosts WHERE name=?
458 $sth->{backups_count} = $dbh->prepare(qq{
461 WHERE hostID=? AND num=? AND shareid=?
464 $sth->{insert_backups} = $dbh->prepare(qq{
465 INSERT INTO backups (hostID, num, date, type, shareid, size)
466 VALUES (?,?,?,?,?,-1)
469 $sth->{update_backups_size} = $dbh->prepare(qq{
470 UPDATE backups SET size = ?
471 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
474 $sth->{insert_files} = $dbh->prepare(qq{
476 (shareID, backupNum, name, path, date, type, size)
477 VALUES (?,?,?,?,?,?,?)
480 my @hosts = keys %{$hosts};
484 if ( exists $opt{h} ) {
485 $host_regex = $opt{h};
486 $host_regex =~ s/\s+/|/g;
487 $host_regex = '^' . $host_regex . '$';
490 foreach my $host_key (@hosts) {
492 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
494 next if $host_regex && $hostname =~ m/$host_regex/;
496 $sth->{hosts_by_name}->execute($hostname);
498 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
499 $sth->{insert_hosts}->execute(
500 $hosts->{$host_key}->{'host'},
501 $hosts->{$host_key}->{'ip'}
504 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
508 # get backups for a host
509 my @backups = $bpc->BackupInfoRead($hostname);
510 my $incs = scalar @backups;
512 my $host_header = sprintf("host %s [%d/%d]: %d increments\n",
513 $hosts->{$host_key}->{'host'},
518 print $host_header unless ($opt{q});
523 foreach my $backup (@backups) {
526 last if (defined $opt{m} && $inc_nr > $opt{m});
528 my $backupNum = $backup->{'num'};
529 my @backupShares = ();
531 my $share_header = sprintf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
532 $hosts->{$host_key}->{'host'},
533 $inc_nr, $incs, $backupNum,
534 $backup->{type} || '?',
535 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
536 strftime($t_fmt,localtime($backup->{startTime})),
537 fmt_time($backup->{endTime} - $backup->{startTime})
539 print $share_header unless ($opt{q});
541 my $files = BackupPC::View->new($bpc, $hostname, \@backups, { only_first => 1 });
543 foreach my $share ($files->shareList($backupNum)) {
547 $shareID = getShareID($share, $hostID, $hostname);
549 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
550 my ($count) = $sth->{backups_count}->fetchrow_array();
551 # skip if allready in database!
552 next if ($count > 0);
554 # dump host and share header for -q
558 $host_header = undef;
564 print curr_time," ", $share;
566 $sth->{insert_backups}->execute(
569 $backup->{'endTime'},
570 substr($backup->{'type'},0,4),
574 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
577 $sth->{update_backups_size}->execute(
581 $backup->{'endTime'},
582 substr($backup->{'type'},0,4),
593 my $dur = (time() - $t) || 1;
594 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
596 ($size / 1024 / 1024),
602 eval { hest_update($hostID, $shareID, $backupNum) };
603 warn "ERROR: $@" if $@;
613 print "total duration: ",fmt_time(time() - $start_t),"\n";
619 my ($share, $hostID, $hostname) = @_;
621 $sth->{share_id} ||= $dbh->prepare(qq{
622 SELECT ID FROM shares WHERE hostID=? AND name=?
625 $sth->{share_id}->execute($hostID,$share);
627 my ($id) = $sth->{share_id}->fetchrow_array();
629 return $id if (defined($id));
631 $sth->{insert_share} ||= $dbh->prepare(qq{
637 my $drop_down = $hostname . '/' . $share;
638 $drop_down =~ s#//+#/#g;
640 $sth->{insert_share}->execute($hostID,$share, $drop_down);
641 return $dbh->last_insert_id(undef,undef,'shares',undef);
649 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
651 return $beenThere->{$key} if (defined($beenThere->{$key}));
653 $sth->{file_in_db} ||= $dbh->prepare(qq{
655 WHERE shareID = ? and
658 ( date = ? or date = ? or date = ? )
662 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
663 $sth->{file_in_db}->execute(@param);
664 my $rows = $sth->{file_in_db}->rows;
665 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
667 $beenThere->{$key}++;
669 $sth->{'insert_files'}->execute(@data) unless ($rows);
673 ####################################################
674 # recursing through filesystem structure and #
675 # and returning flattened files list #
676 ####################################################
677 sub recurseDir($$$$$$$$) {
679 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
681 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
683 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
688 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
689 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
691 # first, add all the entries in current directory
692 foreach my $path_key (keys %{$filesInBackup}) {
693 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
698 $filesInBackup->{$path_key}->{'relPath'},
699 $filesInBackup->{$path_key}->{'mtime'},
700 $filesInBackup->{$path_key}->{'type'},
701 $filesInBackup->{$path_key}->{'size'}
704 my $key = join(" ", (
708 $filesInBackup->{$path_key}->{'mtime'},
709 $filesInBackup->{$path_key}->{'size'}
712 my $key_dst_prev = join(" ", (
716 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
717 $filesInBackup->{$path_key}->{'size'}
720 my $key_dst_next = join(" ", (
724 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
725 $filesInBackup->{$path_key}->{'size'}
730 ! defined($beenThere->{$key}) &&
731 ! defined($beenThere->{$key_dst_prev}) &&
732 ! defined($beenThere->{$key_dst_next}) &&
733 ! ($found = found_in_db($key, @data))
735 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
737 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
738 $new_dirs++ unless ($found);
739 print STDERR " dir\n" if ($debug >= 2);
741 $new_files++ unless ($found);
742 print STDERR " file\n" if ($debug >= 2);
744 $size += $filesInBackup->{$path_key}->{'size'} || 0;
747 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
750 my $full_path = $dir . '/' . $path_key;
751 push @stack, $full_path;
752 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
754 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
766 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
768 while ( my $dir = shift @stack ) {
769 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
770 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
779 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);