4 use lib "/usr/local/BackupPC/lib";
10 use Getopt::Long::Descriptive;
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
16 use Data::Dump qw(dump);
18 use constant BPC_FTYPE_DIR => 5;
19 use constant EST_CHUNK => 4096;
21 # daylight saving time change offset for 1h
22 my $dst_offset = 60 * 60;
29 my $pid_path = abs_path($0);
30 $pid_path =~ s/\W+/_/g;
32 my $pidfile = new File::Pid({
33 file => "/tmp/search_update.pid",
36 if (my $pid = $pidfile->running ) {
37 die "$0 already running: $pid\n";
38 } elsif ($pidfile->pid ne $$) {
40 $pidfile = new File::Pid;
42 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
45 my $t_fmt = '%Y-%m-%d %H:%M:%S';
48 my $bpc = BackupPC::Lib->new || die;
49 my %Conf = $bpc->Conf();
50 my $TopDir = $bpc->TopDir();
53 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
54 my $user = $Conf{SearchUser} || '';
56 my $index_node_url = $Conf{HyperEstraierIndex};
58 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
60 my ($opt,$usage) = describe_options(
62 [ 'create|c', "create database on first use" ],
63 [ 'delete|d', "delete database before import" ],
64 [ 'max|m=i', "import just max increments for one host" ],
65 [ 'host|h=s@', "import just host(s)" ],
67 [ 'verbose|v:i', 'set verbosity (debug) level' ],
68 [ 'index|i', 'update full text index' ],
69 [ 'junk|j', "update full text, don't check existing files" ],
70 [ 'fast|f', "don't do anything with full text index" ],
71 [ 'quiet|q', "be quiet for hosts without changes" ],
72 [ 'help', "show help" ],
75 print($usage->text), exit if $opt->help;
77 warn "hosts: ",dump( $opt->host );
85 $new =~ s{^[\w\/]+/(\w+) }{$1 }; # strip path from process name
86 if ( $text =~ m/^\|/ ) {
87 $new =~ s/\|.*/$text/ or $new .= " $text";
89 $new =~ s/\s+.*/ $text/ or $new .= " $text";
95 my $t = shift || return;
97 my ($ss,$mm,$hh) = gmtime($t);
98 $out .= "${hh}h" if ($hh);
99 $out .= sprintf("%02d:%02d", $mm,$ss);
104 return strftime($t_fmt,localtime());
109 my ($host_id, $share_id, $num) = @_;
111 my $skip_check = $opt->junk && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
113 print curr_time," updating fulltext:";
120 my $search = BackupPC::Search->search_module;
128 if (defined($host_id) && defined($share_id) && defined($num)) {
135 @data = ( $host_id, $share_id, $num );
138 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
140 my $sth = $dbh->prepare(qq{
144 shares.name AS sname,
145 -- shares.share AS sharename,
146 files.backupnum AS backupnum,
147 -- files.name AS filename,
148 files.path AS filepath,
152 files.shareid AS shareid,
153 backups.date AS backup_date
155 INNER JOIN shares ON files.shareID=shares.ID
156 INNER JOIN hosts ON hosts.ID = shares.hostID
157 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
162 $sth->execute(@data);
163 $results = $sth->rows;
166 print " - no new files\n";
173 my $t = shift || return;
174 my $iso = BackupPC::Lib::timeStamp($t);
179 while (my $row = $sth->fetchrow_hashref()) {
180 next if $search->exists( $row );
181 $search->add_doc( $row );
188 $offset += EST_CHUNK;
190 } while ($results == EST_CHUNK);
194 my $dur = (time() - $t) || 1;
195 printf(" [%.2f/s dur: %s]\n",
205 if ( ( $opt->index || $opt->junk ) && !$opt->create ) {
207 print "force update of Hyper Estraier index ";
208 print "by -i flag" if ($opt->index);
209 print "by -j flag" if ($opt->junk);
217 my $index = shift || return;
218 my ($table,$col,$unique) = split(/:/, $index);
221 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
222 $dbh->do(qq{ create $unique index $index on $table($col) });
225 print "creating tables...\n";
229 ID SERIAL PRIMARY KEY,
230 name VARCHAR(30) NOT NULL,
234 create table shares (
235 ID SERIAL PRIMARY KEY,
236 hostID INTEGER NOT NULL references hosts(id),
237 name VARCHAR(30) NOT NULL,
238 share VARCHAR(200) NOT NULL
242 ID SERIAL PRIMARY KEY,
243 num INTEGER NOT NULL,
244 name VARCHAR(255) NOT NULL,
248 create table backups (
250 hostID INTEGER NOT NULL references hosts(id),
251 num INTEGER NOT NULL,
252 date integer NOT NULL,
253 type CHAR(4) not null,
254 shareID integer not null references shares(id),
255 size bigint not null,
256 inc_size bigint not null default -1,
257 inc_deleted boolean default false,
258 parts integer not null default 0,
264 shareID INTEGER NOT NULL references shares(id),
265 backupNum INTEGER NOT NULL,
266 name VARCHAR(255) NOT NULL,
267 path VARCHAR(255) NOT NULL,
268 date integer NOT NULL,
269 type INTEGER NOT NULL,
270 size bigint NOT NULL,
274 create table archive (
277 total_size bigint default -1,
279 username varchar(20) not null,
280 date timestamp default now(),
284 create table archive_backup (
285 archive_id int not null references archive(id) on delete cascade,
286 backup_id int not null references backups(id),
287 primary key(archive_id, backup_id)
290 create table archive_burned (
291 archive_id int references archive(id),
292 date timestamp default now(),
293 part int not null default 1,
294 copy int not null default 1,
295 iso_size bigint default -1
298 create table backup_parts (
300 backup_id int references backups(id),
301 part_nr int not null check (part_nr > 0),
302 tar_size bigint not null check (tar_size > 0),
303 size bigint not null check (size > 0),
305 items int not null check (items > 0),
306 date timestamp default now(),
310 -- report backups and corresponding dvd
312 create view backups_on_dvds as
315 hosts.name || ':' || shares.name as share,
317 backups.type as type,
318 abstime(backups.date) as backup_date,
319 backups.size as size,
320 backups.inc_size as gzip_size,
321 archive.id as archive_id,
324 join shares on backups.shareid=shares.id
325 join hosts on shares.hostid = hosts.id
326 left outer join archive_backup on backups.id = archive_backup.backup_id
327 left outer join archive on archive_backup.archive_id = archive.id
328 where backups.parts > 0 and size > 0
329 order by backups.date
333 print "creating indexes: ";
335 foreach my $index (qw(
348 archive_burned:archive_id
349 backup_parts:backup_id,part_nr:unique
354 print " creating sequence: ";
355 foreach my $seq (qw/dvd_nr/) {
357 $dbh->do( qq{ CREATE SEQUENCE $seq } );
360 print " creating triggers ";
361 $dbh->do( <<__END_OF_TRIGGER__ );
363 create or replace function backup_parts_check() returns trigger as '
369 -- raise notice ''old/new parts %/% backup_id %/%'', old.parts, new.parts, old.id, new.id;
370 if (TG_OP=''UPDATE'') then
372 b_parts := new.parts;
373 elsif (TG_OP = ''INSERT'') then
375 b_parts := new.parts;
377 b_counted := (select count(*) from backup_parts where backup_id = b_id);
378 -- raise notice ''backup % parts %'', b_id, b_parts;
379 if ( b_parts != b_counted ) then
380 raise exception ''Update of backup % aborted, requested % parts and there are really % parts'', b_id, b_parts, b_counted;
386 create trigger do_backup_parts_check
387 after insert or update or delete on backups
388 for each row execute procedure backup_parts_check();
390 create or replace function backup_backup_parts_check() returns trigger as '
396 if (TG_OP = ''INSERT'') then
397 -- raise notice ''trigger: % backup_id %'', TG_OP, new.backup_id;
398 b_id = new.backup_id;
399 my_part_nr = new.part_nr;
400 execute ''update backups set parts = parts + 1 where id = '' || b_id;
401 elsif (TG_OP = ''DELETE'') then
402 -- raise notice ''trigger: % backup_id %'', TG_OP, old.backup_id;
403 b_id = old.backup_id;
404 my_part_nr = old.part_nr;
405 execute ''update backups set parts = parts - 1 where id = '' || b_id;
407 calc_part := (select count(part_nr) from backup_parts where backup_id = b_id);
408 if ( my_part_nr != calc_part ) then
409 raise exception ''Update of backup_parts with backup_id % aborted, requested part_nr is % and calulated next is %'', b_id, my_part_nr, calc_part;
415 create trigger do_backup_backup_parts_check
416 after insert or update or delete on backup_parts
417 for each row execute procedure backup_backup_parts_check();
427 ## delete data before inseting ##
430 foreach my $table (qw(files dvds backups shares hosts)) {
432 $dbh->do(qq{ DELETE FROM $table });
439 ## insert new values ##
442 $hosts = $bpc->HostInfoRead();
448 $sth->{insert_hosts} = $dbh->prepare(qq{
449 INSERT INTO hosts (name, IP) VALUES (?,?)
452 $sth->{hosts_by_name} = $dbh->prepare(qq{
453 SELECT id FROM hosts WHERE name=?
456 $sth->{backups_count} = $dbh->prepare(qq{
459 WHERE hostID=? AND num=? AND shareid=?
462 $sth->{insert_backups} = $dbh->prepare(qq{
463 INSERT INTO backups (hostID, num, date, type, shareid, size)
464 VALUES (?,?,?,?,?,-1)
467 $sth->{update_backups_size} = $dbh->prepare(qq{
468 UPDATE backups SET size = ?
469 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
472 $sth->{insert_files} = $dbh->prepare(qq{
474 (shareID, backupNum, name, path, date, type, size)
475 VALUES (?,?,?,?,?,?,?)
478 my @hosts = keys %{$hosts};
481 foreach my $host_key (@hosts) {
483 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
485 next if $opt->host && ! grep { m/^$hostname$/ } @{ $opt->host };
487 $sth->{hosts_by_name}->execute($hostname);
489 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
490 $sth->{insert_hosts}->execute(
491 $hosts->{$host_key}->{'host'},
492 $hosts->{$host_key}->{'ip'}
495 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
499 # get backups for a host
500 my @backups = $bpc->BackupInfoRead($hostname);
501 my $incs = scalar @backups;
503 my $host_header = sprintf("host %s [%d/%d]: %d increments\n",
504 $hosts->{$host_key}->{'host'},
509 print $host_header unless $opt->quiet;
514 foreach my $backup (@backups) {
517 last if defined $opt->max && $inc_nr > $opt->max;
519 my $backupNum = $backup->{'num'};
520 my @backupShares = ();
522 my $share_header = sprintf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
523 $hosts->{$host_key}->{'host'},
524 $inc_nr, $incs, $backupNum,
525 $backup->{type} || '?',
526 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
527 strftime($t_fmt,localtime($backup->{startTime})),
528 fmt_time($backup->{endTime} - $backup->{startTime})
530 print $share_header unless $opt->quiet;
531 status "$hostname $backupNum $share_header";
533 my $files = BackupPC::View->new($bpc, $hostname, \@backups, { only_increment => 1 });
535 foreach my $share ($files->shareList($backupNum)) {
539 $shareID = getShareID($share, $hostID, $hostname);
541 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
542 my ($count) = $sth->{backups_count}->fetchrow_array();
543 # skip if allready in database!
544 next if ($count > 0);
546 # dump host and share header for -q
550 $host_header = undef;
556 print curr_time," ", $share;
558 $sth->{insert_backups}->execute(
561 $backup->{'endTime'},
562 substr($backup->{'type'},0,4),
566 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
569 $sth->{update_backups_size}->execute(
573 $backup->{'endTime'},
574 substr($backup->{'type'},0,4),
585 my $dur = (time() - $t) || 1;
586 my $status = sprintf("%d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]",
588 ($size / 1024 / 1024),
593 status "$hostname $backupNum $status";
596 status "$hostname $backupNum full-text | indexing";
597 #eval { hest_update($hostID, $shareID, $backupNum) };
598 #warn "ERROR: $@" if $@;
599 hest_update($hostID, $shareID, $backupNum);
600 # eval breaks our re-try logic
610 print "total duration: ",fmt_time(time() - $start_t),"\n";
616 my ($share, $hostID, $hostname) = @_;
618 $sth->{share_id} ||= $dbh->prepare(qq{
619 SELECT ID FROM shares WHERE hostID=? AND name=?
622 $sth->{share_id}->execute($hostID,$share);
624 my ($id) = $sth->{share_id}->fetchrow_array();
626 return $id if (defined($id));
628 $sth->{insert_share} ||= $dbh->prepare(qq{
634 my $drop_down = $hostname . '/' . $share;
635 $drop_down =~ s#//+#/#g;
637 $sth->{insert_share}->execute($hostID,$share, $drop_down);
638 return $dbh->last_insert_id(undef,undef,'shares',undef);
646 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
648 return $beenThere->{$key} if (defined($beenThere->{$key}));
650 $sth->{file_in_db} ||= $dbh->prepare(qq{
652 WHERE shareID = ? and
655 ( date = ? or date = ? or date = ? )
659 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
660 $sth->{file_in_db}->execute(@param);
661 my $rows = $sth->{file_in_db}->rows;
662 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
664 $beenThere->{$key}++;
666 $sth->{'insert_files'}->execute(@data) unless ($rows);
670 ####################################################
671 # recursing through filesystem structure and #
672 # and returning flattened files list #
673 ####################################################
674 sub recurseDir($$$$$$$$) {
676 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
678 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
680 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
685 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
686 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
688 # first, add all the entries in current directory
689 foreach my $path_key (keys %{$filesInBackup}) {
690 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
695 $filesInBackup->{$path_key}->{'relPath'},
696 $filesInBackup->{$path_key}->{'mtime'},
697 $filesInBackup->{$path_key}->{'type'},
698 $filesInBackup->{$path_key}->{'size'}
701 my $key = join(" ", (
705 $filesInBackup->{$path_key}->{'mtime'},
706 $filesInBackup->{$path_key}->{'size'}
709 my $key_dst_prev = join(" ", (
713 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
714 $filesInBackup->{$path_key}->{'size'}
717 my $key_dst_next = join(" ", (
721 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
722 $filesInBackup->{$path_key}->{'size'}
727 ! defined($beenThere->{$key}) &&
728 ! defined($beenThere->{$key_dst_prev}) &&
729 ! defined($beenThere->{$key_dst_next}) &&
730 ! ($found = found_in_db($key, @data))
732 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
734 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
735 $new_dirs++ unless ($found);
736 print STDERR " dir\n" if ($debug >= 2);
738 $new_files++ unless ($found);
739 print STDERR " file\n" if ($debug >= 2);
741 $size += $filesInBackup->{$path_key}->{'size'} || 0;
744 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
747 my $full_path = $dir . '/' . $path_key;
748 push @stack, $full_path;
749 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
751 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
763 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
765 while ( my $dir = shift @stack ) {
766 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
767 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
776 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);