4 use lib "/usr/local/BackupPC/lib";
10 use Getopt::Long::Descriptive;
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
16 use Data::Dump qw(dump);
18 use constant BPC_FTYPE_DIR => 5;
19 use constant EST_CHUNK => 4096;
21 # daylight saving time change offset for 1h
22 my $dst_offset = 60 * 60;
29 my $pid_path = abs_path($0);
30 $pid_path =~ s/\W+/_/g;
32 my $pidfile = new File::Pid({
33 file => "/tmp/search_update.pid",
36 if (my $pid = $pidfile->running ) {
37 die "$0 already running: $pid\n";
38 } elsif ($pidfile->pid ne $$) {
40 $pidfile = new File::Pid;
42 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
45 my $t_fmt = '%Y-%m-%d %H:%M:%S';
48 my $bpc = BackupPC::Lib->new || die;
49 my %Conf = $bpc->Conf();
50 my $TopDir = $bpc->TopDir();
53 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
54 my $user = $Conf{SearchUser} || '';
56 my $index_node_url = $Conf{HyperEstraierIndex};
58 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
60 my ($opt,$usage) = describe_options(
62 [ 'create|c', "create database on first use" ],
63 [ 'delete|d', "delete database before import" ],
64 [ 'max|m=i', "import just max increments for one host" ],
65 [ 'host|h=s@', "import just host(s)" ],
67 [ 'verbose|v:i', 'set verbosity (debug) level' ],
68 [ 'index|i', 'update full text index' ],
69 [ 'junk|j', "update full text, don't check existing files" ],
70 [ 'fast|f', "don't do anything with full text index" ],
71 [ 'quiet|q', "be quiet for hosts without changes" ],
72 [ 'help', "show help" ],
75 print($usage->text), exit if $opt->help;
77 warn "hosts: ",dump( $opt->host );
85 $new =~ s{^[\w\/]+/(\w+) }{$1 }; # strip path from process name
86 if ( $text =~ m/^\|/ ) {
87 $new =~ s/\|.*/$text/ or $new .= " $text";
89 $new =~ s/\s+.*/ $text/ or $new .= " $text";
95 my $t = shift || return;
97 my ($ss,$mm,$hh) = gmtime($t);
98 $out .= "${hh}h" if ($hh);
99 $out .= sprintf("%02d:%02d", $mm,$ss);
104 return strftime($t_fmt,localtime());
109 my ($host_id, $share_id, $num) = @_;
111 my $skip_check = $opt->junk && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
113 print curr_time," updating fulltext:";
120 my $search = BackupPC::Search->search_module;
128 if (defined($host_id) && defined($share_id) && defined($num)) {
135 @data = ( $host_id, $share_id, $num );
138 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
140 my $sth = $dbh->prepare(qq{
144 shares.name AS sname,
145 -- shares.share AS sharename,
146 files.backupnum AS backupnum,
147 -- files.name AS filename,
148 files.path AS filepath,
152 files.shareid AS shareid,
153 backups.date AS backup_date
155 INNER JOIN shares ON files.shareID=shares.ID
156 INNER JOIN hosts ON hosts.ID = shares.hostID
157 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
162 $sth->execute(@data);
163 $results = $sth->rows;
166 print " - no new files\n";
173 my $t = shift || return;
174 my $iso = BackupPC::Lib::timeStamp($t);
179 while (my $row = $sth->fetchrow_hashref()) {
180 next if $search->exists( $row );
181 $search->add_doc( $row );
188 $offset += EST_CHUNK;
190 } while ($results == EST_CHUNK);
194 my $dur = (time() - $t) || 1;
195 printf(" [%.2f/s dur: %s]\n",
205 if ( ( $opt->index || $opt->junk ) && !$opt->create ) {
207 print "force update of Hyper Estraier index ";
208 print "by -i flag" if ($opt->index);
209 print "by -j flag" if ($opt->junk);
217 my $index = shift || return;
218 my ($table,$col,$unique) = split(/:/, $index);
221 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
222 $dbh->do(qq{ create $unique index $index on $table($col) });
225 print "creating tables...\n";
229 ID SERIAL PRIMARY KEY,
230 name VARCHAR(30) NOT NULL,
234 create table shares (
235 ID SERIAL PRIMARY KEY,
236 hostID INTEGER NOT NULL references hosts(id),
237 name VARCHAR(30) NOT NULL,
238 share VARCHAR(200) NOT NULL
242 ID SERIAL PRIMARY KEY,
243 num INTEGER NOT NULL,
244 name VARCHAR(255) NOT NULL,
248 create table backups (
250 hostID INTEGER NOT NULL references hosts(id),
251 num INTEGER NOT NULL,
252 date integer NOT NULL,
253 type CHAR(4) not null,
254 shareID integer not null references shares(id),
255 size bigint not null,
256 inc_size bigint not null default -1,
257 inc_deleted boolean default false,
258 parts integer not null default 0,
264 shareID INTEGER NOT NULL references shares(id),
265 backupNum INTEGER NOT NULL,
266 name VARCHAR(255) NOT NULL,
267 path VARCHAR(255) NOT NULL,
268 date integer NOT NULL,
269 type INTEGER NOT NULL,
270 size bigint NOT NULL,
274 create table archive (
277 total_size bigint default -1,
279 username varchar(20) not null,
280 date timestamp default now(),
284 create table archive_backup (
285 archive_id int not null references archive(id) on delete cascade,
286 backup_id int not null references backups(id),
287 primary key(archive_id, backup_id)
290 create table archive_burned (
291 archive_id int references archive(id),
292 date timestamp default now(),
293 part int not null default 1,
294 copy int not null default 1,
295 iso_size bigint default -1
298 create table backup_parts (
300 backup_id int references backups(id),
301 part_nr int not null check (part_nr > 0),
302 tar_size bigint not null check (tar_size > 0),
303 size bigint not null check (size > 0),
305 items int not null check (items > 0),
306 date timestamp default now(),
307 filename text not null,
311 -- report backups and corresponding dvd
313 create view backups_on_dvds as
316 hosts.name || ':' || shares.name as share,
318 backups.type as type,
319 abstime(backups.date) as backup_date,
320 backups.size as size,
321 backups.inc_size as gzip_size,
322 archive.id as archive_id,
325 join shares on backups.shareid=shares.id
326 join hosts on shares.hostid = hosts.id
327 left outer join archive_backup on backups.id = archive_backup.backup_id
328 left outer join archive on archive_backup.archive_id = archive.id
329 where backups.parts > 0 and size > 0
330 order by backups.date
334 print "creating indexes: ";
336 foreach my $index (qw(
349 archive_burned:archive_id
350 backup_parts:backup_id,part_nr:unique
355 print " creating sequence: ";
356 foreach my $seq (qw/dvd_nr/) {
358 $dbh->do( qq{ CREATE SEQUENCE $seq } );
361 print " creating triggers ";
362 $dbh->do( <<__END_OF_TRIGGER__ );
364 create or replace function backup_parts_check() returns trigger as '
370 -- raise notice ''old/new parts %/% backup_id %/%'', old.parts, new.parts, old.id, new.id;
371 if (TG_OP=''UPDATE'') then
373 b_parts := new.parts;
374 elsif (TG_OP = ''INSERT'') then
376 b_parts := new.parts;
378 b_counted := (select count(*) from backup_parts where backup_id = b_id);
379 -- raise notice ''backup % parts %'', b_id, b_parts;
380 if ( b_parts != b_counted ) then
381 raise exception ''Update of backup % aborted, requested % parts and there are really % parts'', b_id, b_parts, b_counted;
387 create trigger do_backup_parts_check
388 after insert or update or delete on backups
389 for each row execute procedure backup_parts_check();
391 create or replace function backup_backup_parts_check() returns trigger as '
397 if (TG_OP = ''INSERT'') then
398 -- raise notice ''trigger: % backup_id %'', TG_OP, new.backup_id;
399 b_id = new.backup_id;
400 my_part_nr = new.part_nr;
401 execute ''update backups set parts = parts + 1 where id = '' || b_id;
402 elsif (TG_OP = ''DELETE'') then
403 -- raise notice ''trigger: % backup_id %'', TG_OP, old.backup_id;
404 b_id = old.backup_id;
405 my_part_nr = old.part_nr;
406 execute ''update backups set parts = parts - 1 where id = '' || b_id;
408 calc_part := (select count(part_nr) from backup_parts where backup_id = b_id);
409 if ( my_part_nr != calc_part ) then
410 raise exception ''Update of backup_parts with backup_id % aborted, requested part_nr is % and calulated next is %'', b_id, my_part_nr, calc_part;
416 create trigger do_backup_backup_parts_check
417 after insert or update or delete on backup_parts
418 for each row execute procedure backup_backup_parts_check();
428 ## delete data before inseting ##
431 foreach my $table (qw(files dvds backups shares hosts)) {
433 $dbh->do(qq{ DELETE FROM $table });
440 ## insert new values ##
443 $hosts = $bpc->HostInfoRead();
449 $sth->{insert_hosts} = $dbh->prepare(qq{
450 INSERT INTO hosts (name, IP) VALUES (?,?)
453 $sth->{hosts_by_name} = $dbh->prepare(qq{
454 SELECT id FROM hosts WHERE name=?
457 $sth->{backups_count} = $dbh->prepare(qq{
460 WHERE hostID=? AND num=? AND shareid=?
463 $sth->{insert_backups} = $dbh->prepare(qq{
464 INSERT INTO backups (hostID, num, date, type, shareid, size)
465 VALUES (?,?,?,?,?,-1)
468 $sth->{update_backups_size} = $dbh->prepare(qq{
469 UPDATE backups SET size = ?
470 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
473 $sth->{insert_files} = $dbh->prepare(qq{
475 (shareID, backupNum, name, path, date, type, size)
476 VALUES (?,?,?,?,?,?,?)
479 my @hosts = keys %{$hosts};
482 foreach my $host_key (@hosts) {
484 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
486 next if $opt->host && ! grep { m/^$hostname$/ } @{ $opt->host };
488 $sth->{hosts_by_name}->execute($hostname);
490 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
491 $sth->{insert_hosts}->execute(
492 $hosts->{$host_key}->{'host'},
493 $hosts->{$host_key}->{'ip'}
496 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
500 # get backups for a host
501 my @backups = $bpc->BackupInfoRead($hostname);
502 my $incs = scalar @backups;
504 my $host_header = sprintf("host %s [%d/%d]: %d increments\n",
505 $hosts->{$host_key}->{'host'},
510 print $host_header unless $opt->quiet;
515 foreach my $backup (@backups) {
518 last if defined $opt->max && $inc_nr > $opt->max;
520 my $backupNum = $backup->{'num'};
521 my @backupShares = ();
523 my $share_header = sprintf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
524 $hosts->{$host_key}->{'host'},
525 $inc_nr, $incs, $backupNum,
526 $backup->{type} || '?',
527 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
528 strftime($t_fmt,localtime($backup->{startTime})),
529 fmt_time($backup->{endTime} - $backup->{startTime})
531 print $share_header unless $opt->quiet;
532 status "$hostname $backupNum $share_header";
534 my $files = BackupPC::View->new($bpc, $hostname, \@backups, { only_increment => 1 });
536 foreach my $share ($files->shareList($backupNum)) {
540 $shareID = getShareID($share, $hostID, $hostname);
542 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
543 my ($count) = $sth->{backups_count}->fetchrow_array();
544 # skip if allready in database!
545 next if ($count > 0);
547 # dump host and share header for -q
551 $host_header = undef;
557 print curr_time," ", $share;
559 $sth->{insert_backups}->execute(
562 $backup->{'endTime'},
563 substr($backup->{'type'},0,4),
567 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
570 $sth->{update_backups_size}->execute(
574 $backup->{'endTime'},
575 substr($backup->{'type'},0,4),
586 my $dur = (time() - $t) || 1;
587 my $status = sprintf("%d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]",
589 ($size / 1024 / 1024),
594 status "$hostname $backupNum $status";
597 status "$hostname $backupNum full-text | indexing";
598 #eval { hest_update($hostID, $shareID, $backupNum) };
599 #warn "ERROR: $@" if $@;
600 hest_update($hostID, $shareID, $backupNum);
601 # eval breaks our re-try logic
611 print "total duration: ",fmt_time(time() - $start_t),"\n";
617 my ($share, $hostID, $hostname) = @_;
619 $sth->{share_id} ||= $dbh->prepare(qq{
620 SELECT ID FROM shares WHERE hostID=? AND name=?
623 $sth->{share_id}->execute($hostID,$share);
625 my ($id) = $sth->{share_id}->fetchrow_array();
627 return $id if (defined($id));
629 $sth->{insert_share} ||= $dbh->prepare(qq{
635 my $drop_down = $hostname . '/' . $share;
636 $drop_down =~ s#//+#/#g;
638 $sth->{insert_share}->execute($hostID,$share, $drop_down);
639 return $dbh->last_insert_id(undef,undef,'shares',undef);
647 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
649 return $beenThere->{$key} if (defined($beenThere->{$key}));
651 $sth->{file_in_db} ||= $dbh->prepare(qq{
653 WHERE shareID = ? and
656 ( date = ? or date = ? or date = ? )
660 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
661 $sth->{file_in_db}->execute(@param);
662 my $rows = $sth->{file_in_db}->rows;
663 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
665 $beenThere->{$key}++;
667 $sth->{'insert_files'}->execute(@data) unless ($rows);
671 ####################################################
672 # recursing through filesystem structure and #
673 # and returning flattened files list #
674 ####################################################
675 sub recurseDir($$$$$$$$) {
677 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
679 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
681 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
686 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
687 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
689 # first, add all the entries in current directory
690 foreach my $path_key (keys %{$filesInBackup}) {
691 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
696 $filesInBackup->{$path_key}->{'relPath'},
697 $filesInBackup->{$path_key}->{'mtime'},
698 $filesInBackup->{$path_key}->{'type'},
699 $filesInBackup->{$path_key}->{'size'}
702 my $key = join(" ", (
706 $filesInBackup->{$path_key}->{'mtime'},
707 $filesInBackup->{$path_key}->{'size'}
710 my $key_dst_prev = join(" ", (
714 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
715 $filesInBackup->{$path_key}->{'size'}
718 my $key_dst_next = join(" ", (
722 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
723 $filesInBackup->{$path_key}->{'size'}
728 ! defined($beenThere->{$key}) &&
729 ! defined($beenThere->{$key_dst_prev}) &&
730 ! defined($beenThere->{$key_dst_next}) &&
731 ! ($found = found_in_db($key, @data))
733 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
735 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
736 $new_dirs++ unless ($found);
737 print STDERR " dir\n" if ($debug >= 2);
739 $new_files++ unless ($found);
740 print STDERR " file\n" if ($debug >= 2);
742 $size += $filesInBackup->{$path_key}->{'size'} || 0;
745 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
748 my $full_path = $dir . '/' . $path_key;
749 push @stack, $full_path;
750 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
752 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
764 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
766 while ( my $dir = shift @stack ) {
767 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
768 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
777 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);