4 use lib "/usr/local/BackupPC/lib";
10 use Getopt::Long::Descriptive;
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
16 use Data::Dump qw(dump);
18 use constant BPC_FTYPE_DIR => 5;
19 use constant EST_CHUNK => 4096;
21 # daylight saving time change offset for 1h
22 my $dst_offset = 60 * 60;
29 my $pid_path = abs_path($0);
30 $pid_path =~ s/\W+/_/g;
32 my $pidfile = new File::Pid({
33 file => "/tmp/search_update.pid",
36 if (my $pid = $pidfile->running ) {
37 die "$0 already running: $pid\n";
38 } elsif ($pidfile->pid ne $$) {
40 $pidfile = new File::Pid;
42 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
45 my $t_fmt = '%Y-%m-%d %H:%M:%S';
48 my $bpc = BackupPC::Lib->new || die;
49 my %Conf = $bpc->Conf();
50 my $TopDir = $bpc->TopDir();
53 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
54 my $user = $Conf{SearchUser} || '';
56 my $index_node_url = $Conf{HyperEstraierIndex};
58 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
60 my ($opt,$usage) = describe_options(
62 [ 'create|c', "create database on first use" ],
63 [ 'delete|d', "delete database before import" ],
64 [ 'max|m=i', "import just max increments for one host" ],
65 [ 'host|h=s@', "import just host(s)" ],
67 [ 'verbose|v:i', 'set verbosity (debug) level' ],
68 [ 'index|i', 'update full text index' ],
69 [ 'junk|j', "update full text, don't check existing files" ],
70 [ 'fast|f', "don't do anything with full text index" ],
71 [ 'quiet|q', "be quiet for hosts without changes" ],
72 [ 'help', "show help" ],
75 print($usage->text), exit if $opt->help;
77 warn "hosts: ",dump( $opt->host );
85 $new =~ s{^[\w\/]+/(\w+) }{$1 }; # strip path from process name
86 if ( $text =~ m/^\|/ ) {
87 $new =~ s/\|.*/$text/ or $new .= " $text";
89 $new =~ s/\s+.*/ $text/ or $new .= " $text";
95 my $t = shift || return;
97 my ($ss,$mm,$hh) = gmtime($t);
98 $out .= "${hh}h" if ($hh);
99 $out .= sprintf("%02d:%02d", $mm,$ss);
104 return strftime($t_fmt,localtime());
109 my ($host_id, $share_id, $num) = @_;
111 my $skip_check = $opt->junk && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
113 print curr_time," updating fulltext:";
120 my $search = BackupPC::Search->search_module;
128 if (defined($host_id) && defined($share_id) && defined($num)) {
135 @data = ( $host_id, $share_id, $num );
138 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
140 my $sth = $dbh->prepare(qq{
144 shares.name AS sname,
145 -- shares.share AS sharename,
146 files.backupnum AS backupnum,
147 -- files.name AS filename,
148 files.path AS filepath,
152 files.shareid AS shareid,
153 backups.date AS backup_date
155 INNER JOIN shares ON files.shareID=shares.ID
156 INNER JOIN hosts ON hosts.ID = shares.hostID
157 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
162 $sth->execute(@data);
163 $results = $sth->rows;
166 print " - no new files\n";
173 my $t = shift || return;
174 my $iso = BackupPC::Lib::timeStamp($t);
179 while (my $row = $sth->fetchrow_hashref()) {
180 next if $search->exists( $row );
181 $search->add_doc( $row );
188 $offset += EST_CHUNK;
190 } while ($results == EST_CHUNK);
194 my $dur = (time() - $t) || 1;
195 printf(" [%.2f/s dur: %s]\n",
205 if ( ( $opt->index || $opt->junk ) && !$opt->create ) {
207 print "force update of Hyper Estraier index ";
208 print "by -i flag" if ($opt->index);
209 print "by -j flag" if ($opt->junk);
217 my $index = shift || return;
218 my ($table,$col,$unique) = split(/:/, $index);
221 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
222 $dbh->do(qq{ create $unique index $index on $table($col) });
225 print "creating tables...\n";
227 foreach my $sql ( split(/;/, qq{
229 ID SERIAL PRIMARY KEY,
230 name VARCHAR(30) NOT NULL,
234 create table shares (
235 ID SERIAL PRIMARY KEY,
236 hostID INTEGER NOT NULL references hosts(id),
237 name VARCHAR(30) NOT NULL,
238 share VARCHAR(200) NOT NULL
242 ID SERIAL PRIMARY KEY,
243 num INTEGER NOT NULL,
244 name VARCHAR(255) NOT NULL,
248 create table backups (
250 hostID INTEGER NOT NULL references hosts(id),
251 num INTEGER NOT NULL,
252 date integer NOT NULL,
253 type CHAR(4) not null,
254 shareID integer not null references shares(id),
255 size bigint not null,
256 inc_size bigint not null default -1,
257 inc_deleted boolean default false,
258 parts integer not null default 0,
262 create table backup_parts (
264 backup_id int references backups(id),
265 part_nr int not null check (part_nr > 0),
266 tar_size bigint not null check (tar_size > 0),
267 size bigint not null check (size > 0),
269 items int not null check (items > 0),
270 date timestamp default now(),
271 filename text not null,
277 shareID INTEGER NOT NULL references shares(id),
278 backupNum INTEGER NOT NULL,
279 name VARCHAR(255) NOT NULL,
280 path VARCHAR(255) NOT NULL,
281 date integer NOT NULL,
282 type INTEGER NOT NULL,
283 size bigint NOT NULL,
287 create table archive (
290 total_size bigint default -1,
292 username varchar(20) not null,
293 date timestamp default now(),
297 create table archive_parts (
298 archive_id int not null references archive(id) on delete cascade,
299 backup_part_id int not null references backup_parts(id),
300 primary key(archive_id, backup_part_id)
303 create table archive_burned (
304 archive_id int references archive(id),
305 date timestamp default now(),
306 part int not null default 1,
307 copy int not null default 1,
308 iso_size bigint default -1
311 -- report backups and corresponding dvd
313 -- create view backups_on_dvds as
316 -- hosts.name || ':' || shares.name as share,
317 -- backups.num as num,
318 -- backups.type as type,
319 -- abstime(backups.date) as backup_date,
320 -- backups.size as size,
321 -- backups.inc_size as gzip_size,
322 -- archive.id as archive_id,
325 -- join shares on backups.shareid=shares.id
326 -- join hosts on shares.hostid = hosts.id
327 -- left outer join archive_backup on backups.id = archive_backup.backup_id
328 -- left outer join archive on archive_backup.archive_id = archive.id
329 -- where backups.parts > 0 and size > 0
330 -- order by backups.date
337 print "creating indexes: ";
339 foreach my $index (qw(
352 archive_burned:archive_id
353 backup_parts:backup_id,part_nr:unique
358 print " creating sequence: ";
359 foreach my $seq (qw/dvd_nr/) {
361 $dbh->do( qq{ CREATE SEQUENCE $seq } );
364 print " creating triggers ";
365 $dbh->do( <<__END_OF_TRIGGER__ );
367 create or replace function backup_parts_check() returns trigger as '
373 -- raise notice ''old/new parts %/% backup_id %/%'', old.parts, new.parts, old.id, new.id;
374 if (TG_OP=''UPDATE'') then
376 b_parts := new.parts;
377 elsif (TG_OP = ''INSERT'') then
379 b_parts := new.parts;
381 b_counted := (select count(*) from backup_parts where backup_id = b_id);
382 -- raise notice ''backup % parts %'', b_id, b_parts;
383 if ( b_parts != b_counted ) then
384 raise exception ''Update of backup % aborted, requested % parts and there are really % parts'', b_id, b_parts, b_counted;
390 create trigger do_backup_parts_check
391 after insert or update or delete on backups
392 for each row execute procedure backup_parts_check();
394 create or replace function backup_backup_parts_check() returns trigger as '
400 if (TG_OP = ''INSERT'') then
401 -- raise notice ''trigger: % backup_id %'', TG_OP, new.backup_id;
402 b_id = new.backup_id;
403 my_part_nr = new.part_nr;
404 execute ''update backups set parts = parts + 1 where id = '' || b_id;
405 elsif (TG_OP = ''DELETE'') then
406 -- raise notice ''trigger: % backup_id %'', TG_OP, old.backup_id;
407 b_id = old.backup_id;
408 my_part_nr = old.part_nr;
409 execute ''update backups set parts = parts - 1 where id = '' || b_id;
411 calc_part := (select count(part_nr) from backup_parts where backup_id = b_id);
412 if ( my_part_nr != calc_part ) then
413 raise exception ''Update of backup_parts with backup_id % aborted, requested part_nr is % and calulated next is %'', b_id, my_part_nr, calc_part;
419 create trigger do_backup_backup_parts_check
420 after insert or update or delete on backup_parts
421 for each row execute procedure backup_backup_parts_check();
431 ## delete data before inseting ##
434 foreach my $table (qw(files dvds backups shares hosts)) {
436 $dbh->do(qq{ DELETE FROM $table });
443 ## insert new values ##
446 $hosts = $bpc->HostInfoRead();
452 $sth->{insert_hosts} = $dbh->prepare(qq{
453 INSERT INTO hosts (name, IP) VALUES (?,?)
456 $sth->{hosts_by_name} = $dbh->prepare(qq{
457 SELECT id FROM hosts WHERE name=?
460 $sth->{backups_count} = $dbh->prepare(qq{
463 WHERE hostID=? AND num=? AND shareid=?
466 $sth->{insert_backups} = $dbh->prepare(qq{
467 INSERT INTO backups (hostID, num, date, type, shareid, size)
468 VALUES (?,?,?,?,?,-1)
471 $sth->{update_backups_size} = $dbh->prepare(qq{
472 UPDATE backups SET size = ?
473 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
476 $sth->{insert_files} = $dbh->prepare(qq{
478 (shareID, backupNum, name, path, date, type, size)
479 VALUES (?,?,?,?,?,?,?)
482 my @hosts = keys %{$hosts};
485 foreach my $host_key (@hosts) {
487 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
489 next if $opt->host && ! grep { m/^$hostname$/ } @{ $opt->host };
491 $sth->{hosts_by_name}->execute($hostname);
493 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
494 $sth->{insert_hosts}->execute(
495 $hosts->{$host_key}->{'host'},
496 $hosts->{$host_key}->{'ip'}
499 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
503 # get backups for a host
504 my @backups = $bpc->BackupInfoRead($hostname);
505 my $incs = scalar @backups;
507 my $host_header = sprintf("host %s [%d/%d]: %d increments\n",
508 $hosts->{$host_key}->{'host'},
513 print $host_header unless $opt->quiet;
518 foreach my $backup (@backups) {
521 last if defined $opt->max && $inc_nr > $opt->max;
523 my $backupNum = $backup->{'num'};
524 my @backupShares = ();
526 my $share_header = sprintf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
527 $hosts->{$host_key}->{'host'},
528 $inc_nr, $incs, $backupNum,
529 $backup->{type} || '?',
530 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
531 strftime($t_fmt,localtime($backup->{startTime})),
532 fmt_time($backup->{endTime} - $backup->{startTime})
534 print $share_header unless $opt->quiet;
535 status "$hostname $backupNum $share_header";
537 my $files = BackupPC::View->new($bpc, $hostname, \@backups, { only_increment => 1 });
539 foreach my $share ($files->shareList($backupNum)) {
543 $shareID = getShareID($share, $hostID, $hostname);
545 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
546 my ($count) = $sth->{backups_count}->fetchrow_array();
547 # skip if allready in database!
548 next if ($count > 0);
550 # dump host and share header for -q
554 $host_header = undef;
560 print curr_time," ", $share;
562 $sth->{insert_backups}->execute(
565 $backup->{'endTime'},
566 substr($backup->{'type'},0,4),
570 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
573 $sth->{update_backups_size}->execute(
577 $backup->{'endTime'},
578 substr($backup->{'type'},0,4),
589 my $dur = (time() - $t) || 1;
590 my $status = sprintf("%d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]",
592 ($size / 1024 / 1024),
597 status "$hostname $backupNum $status";
600 status "$hostname $backupNum full-text | indexing";
601 #eval { hest_update($hostID, $shareID, $backupNum) };
602 #warn "ERROR: $@" if $@;
603 hest_update($hostID, $shareID, $backupNum);
604 # eval breaks our re-try logic
614 print "total duration: ",fmt_time(time() - $start_t),"\n";
620 my ($share, $hostID, $hostname) = @_;
622 $sth->{share_id} ||= $dbh->prepare(qq{
623 SELECT ID FROM shares WHERE hostID=? AND name=?
626 $sth->{share_id}->execute($hostID,$share);
628 my ($id) = $sth->{share_id}->fetchrow_array();
630 return $id if (defined($id));
632 $sth->{insert_share} ||= $dbh->prepare(qq{
638 my $drop_down = $hostname . '/' . $share;
639 $drop_down =~ s#//+#/#g;
641 $sth->{insert_share}->execute($hostID,$share, $drop_down);
642 return $dbh->last_insert_id(undef,undef,'shares',undef);
650 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
652 return $beenThere->{$key} if (defined($beenThere->{$key}));
654 $sth->{file_in_db} ||= $dbh->prepare(qq{
656 WHERE shareID = ? and
659 ( date = ? or date = ? or date = ? )
663 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
664 $sth->{file_in_db}->execute(@param);
665 my $rows = $sth->{file_in_db}->rows;
666 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
668 $beenThere->{$key}++;
670 $sth->{'insert_files'}->execute(@data) unless ($rows);
674 ####################################################
675 # recursing through filesystem structure and #
676 # and returning flattened files list #
677 ####################################################
678 sub recurseDir($$$$$$$$) {
680 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
682 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
684 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
689 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
690 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
692 # first, add all the entries in current directory
693 foreach my $path_key (keys %{$filesInBackup}) {
694 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
699 $filesInBackup->{$path_key}->{'relPath'},
700 $filesInBackup->{$path_key}->{'mtime'},
701 $filesInBackup->{$path_key}->{'type'},
702 $filesInBackup->{$path_key}->{'size'}
705 my $key = join(" ", (
709 $filesInBackup->{$path_key}->{'mtime'},
710 $filesInBackup->{$path_key}->{'size'}
713 my $key_dst_prev = join(" ", (
717 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
718 $filesInBackup->{$path_key}->{'size'}
721 my $key_dst_next = join(" ", (
725 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
726 $filesInBackup->{$path_key}->{'size'}
731 ! defined($beenThere->{$key}) &&
732 ! defined($beenThere->{$key_dst_prev}) &&
733 ! defined($beenThere->{$key_dst_next}) &&
734 ! ($found = found_in_db($key, @data))
736 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
738 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
739 $new_dirs++ unless ($found);
740 print STDERR " dir\n" if ($debug >= 2);
742 $new_files++ unless ($found);
743 print STDERR " file\n" if ($debug >= 2);
745 $size += $filesInBackup->{$path_key}->{'size'} || 0;
748 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
751 my $full_path = $dir . '/' . $path_key;
752 push @stack, $full_path;
753 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
755 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
767 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
769 while ( my $dir = shift @stack ) {
770 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
771 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
780 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);