4 use lib "/usr/local/BackupPC/lib";
10 use Getopt::Long::Descriptive;
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
16 use Data::Dump qw(dump);
18 use constant BPC_FTYPE_DIR => 5;
19 use constant EST_CHUNK => 4096;
21 # daylight saving time change offset for 1h
22 my $dst_offset = 60 * 60;
29 my $pid_path = abs_path($0);
30 $pid_path =~ s/\W+/_/g;
32 my $pidfile = new File::Pid({
33 file => "/tmp/search_update.pid",
36 if (my $pid = $pidfile->running ) {
37 die "$0 already running: $pid\n";
38 } elsif ($pidfile->pid ne $$) {
40 $pidfile = new File::Pid;
42 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
45 my $t_fmt = '%Y-%m-%d %H:%M:%S';
48 my $bpc = BackupPC::Lib->new || die;
49 my %Conf = $bpc->Conf();
50 my $TopDir = $bpc->TopDir();
53 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
54 my $user = $Conf{SearchUser} || '';
56 my $index_node_url = $Conf{HyperEstraierIndex};
58 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
60 my ($opt,$usage) = describe_options(
62 [ 'create|c', "create database on first use" ],
63 [ 'delete|d', "delete database before import" ],
64 [ 'max|m=i', "import just max increments for one host" ],
65 [ 'host|h=s@', "import just host(s)" ],
67 [ 'verbose|v:i', 'set verbosity (debug) level' ],
68 [ 'index|i', 'update full text index' ],
69 [ 'junk|j', "update full text, don't check existing files" ],
70 [ 'fast|f', "don't do anything with full text index" ],
71 [ 'quiet|q', "be quiet for hosts without changes" ],
72 [ 'help', "show help" ],
75 print($usage->text), exit if $opt->help;
77 warn "hosts: ",dump( $opt->host );
85 $new =~ s{^[\w\/]+/(\w+) }{$1 }; # strip path from process name
86 if ( $text =~ m/^\|/ ) {
87 $new =~ s/\|.*/$text/ or $new .= " $text";
89 $new =~ s/\s+.*/ $text/ or $new .= " $text";
95 my $t = shift || return;
97 my ($ss,$mm,$hh) = gmtime($t);
98 $out .= "${hh}h" if ($hh);
99 $out .= sprintf("%02d:%02d", $mm,$ss);
104 return strftime($t_fmt,localtime());
109 my ($host_id, $share_id, $num) = @_;
111 my $skip_check = $opt->junk && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
113 print curr_time," updating fulltext:";
120 my $search = BackupPC::Search->search_module;
128 if (defined($host_id) && defined($share_id) && defined($num)) {
135 @data = ( $host_id, $share_id, $num );
138 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
140 my $sth = $dbh->prepare(qq{
144 shares.name AS sname,
145 -- shares.share AS sharename,
146 files.backupnum AS backupnum,
147 -- files.name AS filename,
148 files.path AS filepath,
152 files.shareid AS shareid,
153 backups.date AS backup_date
155 INNER JOIN shares ON files.shareID=shares.ID
156 INNER JOIN hosts ON hosts.ID = shares.hostID
157 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
162 $sth->execute(@data);
163 $results = $sth->rows;
166 print " - no new files\n";
173 my $t = shift || return;
174 my $iso = BackupPC::Lib::timeStamp($t);
179 while (my $row = $sth->fetchrow_hashref()) {
180 next if $search->exists( $row );
181 $search->add_doc( $row );
188 $offset += EST_CHUNK;
190 } while ($results == EST_CHUNK);
194 my $dur = (time() - $t) || 1;
195 printf(" [%.2f/s dur: %s]\n",
205 if ( ( $opt->index || $opt->junk ) && !$opt->create ) {
207 print "force update of Hyper Estraier index ";
208 print "by -i flag" if ($opt->index);
209 print "by -j flag" if ($opt->junk);
217 my $index = shift || return;
218 my ($table,$col,$unique) = split(/:/, $index);
221 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
222 $dbh->do(qq{ create $unique index $index on $table($col) });
225 print "creating tables...\n";
227 foreach my $sql ( split(/;/, qq{
229 ID SERIAL PRIMARY KEY,
230 name VARCHAR(30) NOT NULL,
234 create table shares (
235 ID SERIAL PRIMARY KEY,
236 hostID INTEGER NOT NULL references hosts(id),
237 name VARCHAR(30) NOT NULL,
238 share VARCHAR(200) NOT NULL
242 ID SERIAL PRIMARY KEY,
243 num INTEGER NOT NULL,
244 name VARCHAR(255) NOT NULL,
248 create table backups (
250 hostID INTEGER NOT NULL references hosts(id),
251 num INTEGER NOT NULL,
252 date integer NOT NULL,
253 type CHAR(4) not null,
254 shareID integer not null references shares(id),
255 size bigint not null,
256 inc_size bigint not null default -1,
257 inc_deleted boolean default false,
258 parts integer not null default 0,
262 create table backup_parts (
264 backup_id int references backups(id),
265 part_nr int not null check (part_nr > 0),
266 tar_size bigint not null check (tar_size > 0),
267 size bigint not null check (size > 0),
269 items int not null check (items > 0),
270 date timestamp default now(),
271 filename text not null,
277 shareID INTEGER NOT NULL references shares(id),
278 backupNum INTEGER NOT NULL,
279 name VARCHAR(255) NOT NULL,
280 path VARCHAR(255) NOT NULL,
281 date integer NOT NULL,
282 type INTEGER NOT NULL,
283 size bigint NOT NULL,
287 create table archive (
290 total_size bigint default -1,
292 username varchar(20) not null,
293 date timestamp default now(),
297 create table archive_parts (
298 archive_id int not null references archive(id) on delete cascade,
299 backup_id int not null references backups(id),
300 backup_part_id int not null references backup_parts(id),
301 primary key(archive_id, backup_id, backup_part_id)
304 create table archive_burned (
305 archive_id int references archive(id),
306 date timestamp default now(),
307 part int not null default 1,
308 copy int not null default 1,
309 iso_size bigint default -1
312 -- report backups and corresponding dvd
314 -- create view backups_on_dvds as
317 -- hosts.name || ':' || shares.name as share,
318 -- backups.num as num,
319 -- backups.type as type,
320 -- abstime(backups.date) as backup_date,
321 -- backups.size as size,
322 -- backups.inc_size as gzip_size,
323 -- archive.id as archive_id,
326 -- join shares on backups.shareid=shares.id
327 -- join hosts on shares.hostid = hosts.id
328 -- left outer join archive_backup on backups.id = archive_backup.backup_id
329 -- left outer join archive on archive_backup.archive_id = archive.id
330 -- where backups.parts > 0 and size > 0
331 -- order by backups.date
338 print "creating indexes: ";
340 foreach my $index (qw(
353 archive_burned:archive_id
354 backup_parts:backup_id,part_nr:unique
359 print " creating sequence: ";
360 foreach my $seq (qw/dvd_nr/) {
362 $dbh->do( qq{ CREATE SEQUENCE $seq } );
365 print " creating triggers ";
366 $dbh->do( <<__END_OF_TRIGGER__ );
368 create or replace function backup_parts_check() returns trigger as '
374 -- raise notice ''old/new parts %/% backup_id %/%'', old.parts, new.parts, old.id, new.id;
375 if (TG_OP=''UPDATE'') then
377 b_parts := new.parts;
378 elsif (TG_OP = ''INSERT'') then
380 b_parts := new.parts;
382 b_counted := (select count(*) from backup_parts where backup_id = b_id);
383 -- raise notice ''backup % parts %'', b_id, b_parts;
384 if ( b_parts != b_counted ) then
385 raise exception ''Update of backup % aborted, requested % parts and there are really % parts'', b_id, b_parts, b_counted;
391 create trigger do_backup_parts_check
392 after insert or update or delete on backups
393 for each row execute procedure backup_parts_check();
395 create or replace function backup_backup_parts_check() returns trigger as '
401 if (TG_OP = ''INSERT'') then
402 -- raise notice ''trigger: % backup_id %'', TG_OP, new.backup_id;
403 b_id = new.backup_id;
404 my_part_nr = new.part_nr;
405 execute ''update backups set parts = parts + 1 where id = '' || b_id;
406 elsif (TG_OP = ''DELETE'') then
407 -- raise notice ''trigger: % backup_id %'', TG_OP, old.backup_id;
408 b_id = old.backup_id;
409 my_part_nr = old.part_nr;
410 execute ''update backups set parts = parts - 1 where id = '' || b_id;
412 calc_part := (select count(part_nr) from backup_parts where backup_id = b_id);
413 if ( my_part_nr != calc_part ) then
414 raise exception ''Update of backup_parts with backup_id % aborted, requested part_nr is % and calulated next is %'', b_id, my_part_nr, calc_part;
420 create trigger do_backup_backup_parts_check
421 after insert or update or delete on backup_parts
422 for each row execute procedure backup_backup_parts_check();
432 ## delete data before inseting ##
435 foreach my $table (qw(files dvds backups shares hosts)) {
437 $dbh->do(qq{ DELETE FROM $table });
444 ## insert new values ##
447 $hosts = $bpc->HostInfoRead();
453 $sth->{insert_hosts} = $dbh->prepare(qq{
454 INSERT INTO hosts (name, IP) VALUES (?,?)
457 $sth->{hosts_by_name} = $dbh->prepare(qq{
458 SELECT id FROM hosts WHERE name=?
461 $sth->{backups_count} = $dbh->prepare(qq{
464 WHERE hostID=? AND num=? AND shareid=?
467 $sth->{insert_backups} = $dbh->prepare(qq{
468 INSERT INTO backups (hostID, num, date, type, shareid, size)
469 VALUES (?,?,?,?,?,-1)
472 $sth->{update_backups_size} = $dbh->prepare(qq{
473 UPDATE backups SET size = ?
474 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
477 $sth->{insert_files} = $dbh->prepare(qq{
479 (shareID, backupNum, name, path, date, type, size)
480 VALUES (?,?,?,?,?,?,?)
483 my @hosts = keys %{$hosts};
486 foreach my $host_key (@hosts) {
488 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
490 next if $opt->host && ! grep { m/^$hostname$/ } @{ $opt->host };
492 $sth->{hosts_by_name}->execute($hostname);
494 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
495 $sth->{insert_hosts}->execute(
496 $hosts->{$host_key}->{'host'},
497 $hosts->{$host_key}->{'ip'}
500 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
504 # get backups for a host
505 my @backups = $bpc->BackupInfoRead($hostname);
506 my $incs = scalar @backups;
508 my $host_header = sprintf("host %s [%d/%d]: %d increments\n",
509 $hosts->{$host_key}->{'host'},
514 print $host_header unless $opt->quiet;
519 foreach my $backup (@backups) {
522 last if defined $opt->max && $inc_nr > $opt->max;
524 my $backupNum = $backup->{'num'};
525 my @backupShares = ();
527 my $share_header = sprintf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
528 $hosts->{$host_key}->{'host'},
529 $inc_nr, $incs, $backupNum,
530 $backup->{type} || '?',
531 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
532 strftime($t_fmt,localtime($backup->{startTime})),
533 fmt_time($backup->{endTime} - $backup->{startTime})
535 print $share_header unless $opt->quiet;
536 status "$hostname $backupNum $share_header";
538 my $files = BackupPC::View->new($bpc, $hostname, \@backups, { only_increment => 1 });
540 foreach my $share ($files->shareList($backupNum)) {
544 $shareID = getShareID($share, $hostID, $hostname);
546 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
547 my ($count) = $sth->{backups_count}->fetchrow_array();
548 # skip if allready in database!
549 next if ($count > 0);
551 # dump host and share header for -q
555 $host_header = undef;
561 print curr_time," ", $share;
563 $sth->{insert_backups}->execute(
566 $backup->{'endTime'},
567 substr($backup->{'type'},0,4),
571 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
574 $sth->{update_backups_size}->execute(
578 $backup->{'endTime'},
579 substr($backup->{'type'},0,4),
590 my $dur = (time() - $t) || 1;
591 my $status = sprintf("%d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]",
593 ($size / 1024 / 1024),
598 status "$hostname $backupNum $status";
601 status "$hostname $backupNum full-text | indexing";
602 #eval { hest_update($hostID, $shareID, $backupNum) };
603 #warn "ERROR: $@" if $@;
604 hest_update($hostID, $shareID, $backupNum);
605 # eval breaks our re-try logic
615 print "total duration: ",fmt_time(time() - $start_t),"\n";
621 my ($share, $hostID, $hostname) = @_;
623 $sth->{share_id} ||= $dbh->prepare(qq{
624 SELECT ID FROM shares WHERE hostID=? AND name=?
627 $sth->{share_id}->execute($hostID,$share);
629 my ($id) = $sth->{share_id}->fetchrow_array();
631 return $id if (defined($id));
633 $sth->{insert_share} ||= $dbh->prepare(qq{
639 my $drop_down = $hostname . '/' . $share;
640 $drop_down =~ s#//+#/#g;
642 $sth->{insert_share}->execute($hostID,$share, $drop_down);
643 return $dbh->last_insert_id(undef,undef,'shares',undef);
651 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
653 return $beenThere->{$key} if (defined($beenThere->{$key}));
655 $sth->{file_in_db} ||= $dbh->prepare(qq{
657 WHERE shareID = ? and
660 ( date = ? or date = ? or date = ? )
664 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
665 $sth->{file_in_db}->execute(@param);
666 my $rows = $sth->{file_in_db}->rows;
667 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
669 $beenThere->{$key}++;
671 $sth->{'insert_files'}->execute(@data) unless ($rows);
675 ####################################################
676 # recursing through filesystem structure and #
677 # and returning flattened files list #
678 ####################################################
679 sub recurseDir($$$$$$$$) {
681 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
683 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
685 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
690 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
691 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
693 # first, add all the entries in current directory
694 foreach my $path_key (keys %{$filesInBackup}) {
695 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
700 $filesInBackup->{$path_key}->{'relPath'},
701 $filesInBackup->{$path_key}->{'mtime'},
702 $filesInBackup->{$path_key}->{'type'},
703 $filesInBackup->{$path_key}->{'size'}
706 my $key = join(" ", (
710 $filesInBackup->{$path_key}->{'mtime'},
711 $filesInBackup->{$path_key}->{'size'}
714 my $key_dst_prev = join(" ", (
718 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
719 $filesInBackup->{$path_key}->{'size'}
722 my $key_dst_next = join(" ", (
726 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
727 $filesInBackup->{$path_key}->{'size'}
732 ! defined($beenThere->{$key}) &&
733 ! defined($beenThere->{$key_dst_prev}) &&
734 ! defined($beenThere->{$key_dst_next}) &&
735 ! ($found = found_in_db($key, @data))
737 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
739 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
740 $new_dirs++ unless ($found);
741 print STDERR " dir\n" if ($debug >= 2);
743 $new_files++ unless ($found);
744 print STDERR " file\n" if ($debug >= 2);
746 $size += $filesInBackup->{$path_key}->{'size'} || 0;
749 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
752 my $full_path = $dir . '/' . $path_key;
753 push @stack, $full_path;
754 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
756 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
768 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
770 while ( my $dir = shift @stack ) {
771 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
772 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
781 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);