1 #!/usr/local/bin/perl -w
4 use lib "__INSTALLDIR__/lib";
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
17 use constant BPC_FTYPE_DIR => 5;
18 use constant EST_CHUNK => 4096;
20 # daylight saving time change offset for 1h
21 my $dst_offset = 60 * 60;
28 my $pid_path = abs_path($0);
29 $pid_path =~ s/\W+/_/g;
31 my $pidfile = new File::Pid({
32 file => "/tmp/$pid_path",
35 if (my $pid = $pidfile->running ) {
36 die "$0 already running: $pid\n";
37 } elsif ($pidfile->pid ne $$) {
39 $pidfile = new File::Pid;
41 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
44 my $t_fmt = '%Y-%m-%d %H:%M:%S';
47 my $bpc = BackupPC::Lib->new || die;
48 my %Conf = $bpc->Conf();
49 my $TopDir = $bpc->TopDir();
52 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
53 my $user = $Conf{SearchUser} || '';
55 my $index_node_url = $Conf{HyperEstraierIndex};
57 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
61 if ( !getopts("cdm:v:ijfq", \%opt ) ) {
63 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
66 -c create database on first use
67 -d delete database before import
68 -m num import just num increments for one host
69 -v num set verbosity (debug) level (default $debug)
70 -i update Hyper Estraier full text index
71 -j update full text, don't check existing files
72 -f don't do anything with full text index
73 -q be quiet for hosts without changes
75 Option -j is variation on -i. It will allow faster initial creation
76 of full-text index from existing database.
78 Option -f will create database which is out of sync with full text index. You
79 will have to re-run $0 with -i to fix it.
86 print "Debug level at $opt{v}\n";
89 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
90 $index_node_url = undef;
96 my $t = shift || return;
98 my ($ss,$mm,$hh) = gmtime($t);
99 $out .= "${hh}h" if ($hh);
100 $out .= sprintf("%02d:%02d", $mm,$ss);
105 return strftime($t_fmt,localtime());
112 my ($host_id, $share_id, $num) = @_;
114 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
116 unless ($index_node_url && $index_node_url =~ m#^http://#) {
117 print STDERR "HyperEstraier support not enabled or index node invalid\n" if ($debug);
122 print curr_time," updating Hyper Estraier:";
129 if ($index_node_url) {
130 print " opening index $index_node_url";
131 $hest_node ||= Search::Estraier::Node->new(
132 url => $index_node_url,
137 print " via node URL";
146 if (defined($host_id) && defined($share_id) && defined($num)) {
153 @data = ( $host_id, $share_id, $num );
156 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
158 my $sth = $dbh->prepare(qq{
162 shares.name AS sname,
163 -- shares.share AS sharename,
164 files.backupnum AS backupnum,
165 -- files.name AS filename,
166 files.path AS filepath,
170 files.shareid AS shareid,
171 backups.date AS backup_date
173 INNER JOIN shares ON files.shareID=shares.ID
174 INNER JOIN hosts ON hosts.ID = shares.hostID
175 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
180 $sth->execute(@data);
181 $results = $sth->rows;
184 print " - no new files\n";
191 my $t = shift || return;
192 my $iso = BackupPC::Lib::timeStamp($t);
197 while (my $row = $sth->fetchrow_hashref()) {
199 my $uri = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath};
200 if (! $skip_check && $hest_node) {
201 my $id = $hest_node->uri_to_id($uri);
202 next if ($id && $id == -1);
205 # create a document object
206 my $doc = Search::Estraier::Document->new;
208 # add attributes to the document object
209 $doc->add_attr('@uri', $uri);
211 foreach my $c (@{ $sth->{NAME} }) {
212 print STDERR "attr $c = $row->{$c}\n" if ($debug > 2);
213 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
216 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
218 # add the body text to the document object
219 my $path = $row->{'filepath'};
220 $doc->add_text($path);
221 $path =~ s/(.)/$1 /g;
222 $doc->add_hidden_text($path);
224 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
226 # register the document object to the database
227 $hest_node->put_doc($doc) if ($hest_node);
234 $offset += EST_CHUNK;
236 } while ($results == EST_CHUNK);
238 my $dur = (time() - $t) || 1;
239 printf(" [%.2f/s dur: %s]\n",
249 if ( ( $opt{i} || $opt{j} ) && !$opt{c} ) {
251 print "force update of Hyper Estraier index ";
252 print "by -i flag" if ($opt{i});
253 print "by -j flag" if ($opt{j});
261 my $index = shift || return;
262 my ($table,$col,$unique) = split(/:/, $index);
265 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
266 $dbh->do(qq{ create $unique index $index on $table($col) });
269 print "creating tables...\n";
273 ID SERIAL PRIMARY KEY,
274 name VARCHAR(30) NOT NULL,
278 create table shares (
279 ID SERIAL PRIMARY KEY,
280 hostID INTEGER NOT NULL references hosts(id),
281 name VARCHAR(30) NOT NULL,
282 share VARCHAR(200) NOT NULL
286 ID SERIAL PRIMARY KEY,
287 num INTEGER NOT NULL,
288 name VARCHAR(255) NOT NULL,
292 create table backups (
294 hostID INTEGER NOT NULL references hosts(id),
295 num INTEGER NOT NULL,
296 date integer NOT NULL,
297 type CHAR(4) not null,
298 shareID integer not null references shares(id),
299 size bigint not null,
300 inc_size bigint not null default -1,
301 inc_deleted boolean default false,
302 parts integer not null default 0,
308 shareID INTEGER NOT NULL references shares(id),
309 backupNum INTEGER NOT NULL,
310 name VARCHAR(255) NOT NULL,
311 path VARCHAR(255) NOT NULL,
312 date integer NOT NULL,
313 type INTEGER NOT NULL,
314 size bigint NOT NULL,
318 create table archive (
321 total_size bigint default -1,
323 username varchar(20) not null,
324 date timestamp default now(),
328 create table archive_backup (
329 archive_id int not null references archive(id) on delete cascade,
330 backup_id int not null references backups(id),
331 primary key(archive_id, backup_id)
334 create table archive_burned (
335 archive_id int references archive(id),
336 date timestamp default now(),
337 part int not null default 1,
338 copy int not null default 1,
339 iso_size bigint default -1
342 create table backup_parts (
344 backup_id int references backups(id),
345 part_nr int not null check (part_nr > 0),
346 tar_size bigint not null check (tar_size > 0),
347 size bigint not null check (size > 0),
349 items int not null check (items > 0),
350 date timestamp default now(),
355 print "creating indexes: ";
357 foreach my $index (qw(
370 archive_burned:archive_id
371 backup_parts:backup_id,part_nr:unique
376 print " creating sequence: ";
377 foreach my $seq (qw/dvd_nr/) {
379 $dbh->do( qq{ CREATE SEQUENCE $seq } );
382 print " creating triggers ";
383 $dbh->do( <<__END_OF_TRIGGER__ );
385 create or replace function backup_parts_check() returns trigger as '
391 -- raise notice ''old/new parts %/% backup_id %/%'', old.parts, new.parts, old.id, new.id;
392 if (TG_OP=''UPDATE'') then
394 b_parts := new.parts;
395 elsif (TG_OP = ''INSERT'') then
397 b_parts := new.parts;
399 b_counted := (select count(*) from backup_parts where backup_id = b_id);
400 -- raise notice ''backup % parts %'', b_id, b_parts;
401 if ( b_parts != b_counted ) then
402 raise exception ''Update of backup % aborted, requested % parts and there are really % parts'', b_id, b_parts, b_counted;
408 create trigger do_backup_parts_check
409 after insert or update or delete on backups
410 for each row execute procedure backup_parts_check();
412 create or replace function backup_backup_parts_check() returns trigger as '
418 if (TG_OP = ''INSERT'') then
419 -- raise notice ''trigger: % backup_id %'', TG_OP, new.backup_id;
420 b_id = new.backup_id;
421 my_part_nr = new.part_nr;
422 execute ''update backups set parts = parts + 1 where id = '' || b_id;
423 elsif (TG_OP = ''DELETE'') then
424 -- raise notice ''trigger: % backup_id %'', TG_OP, old.backup_id;
425 b_id = old.backup_id;
426 my_part_nr = old.part_nr;
427 execute ''update backups set parts = parts - 1 where id = '' || b_id;
429 calc_part := (select count(part_nr) from backup_parts where backup_id = b_id);
430 if ( my_part_nr != calc_part ) then
431 raise exception ''Update of backup_parts with backup_id % aborted, requested part_nr is % and calulated next is %'', b_id, my_part_nr, calc_part;
437 create trigger do_backup_backup_parts_check
438 after insert or update or delete on backup_parts
439 for each row execute procedure backup_backup_parts_check();
449 ## delete data before inseting ##
452 foreach my $table (qw(files dvds backups shares hosts)) {
454 $dbh->do(qq{ DELETE FROM $table });
461 ## insert new values ##
464 $hosts = $bpc->HostInfoRead();
470 $sth->{insert_hosts} = $dbh->prepare(qq{
471 INSERT INTO hosts (name, IP) VALUES (?,?)
474 $sth->{hosts_by_name} = $dbh->prepare(qq{
475 SELECT ID FROM hosts WHERE name=?
478 $sth->{backups_count} = $dbh->prepare(qq{
481 WHERE hostID=? AND num=? AND shareid=?
484 $sth->{insert_backups} = $dbh->prepare(qq{
485 INSERT INTO backups (hostID, num, date, type, shareid, size)
486 VALUES (?,?,?,?,?,-1)
489 $sth->{update_backups_size} = $dbh->prepare(qq{
490 UPDATE backups SET size = ?
491 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
494 $sth->{insert_files} = $dbh->prepare(qq{
496 (shareID, backupNum, name, path, date, type, size)
497 VALUES (?,?,?,?,?,?,?)
500 my @hosts = keys %{$hosts};
503 foreach my $host_key (@hosts) {
505 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
507 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
509 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
510 $sth->{insert_hosts}->execute(
511 $hosts->{$host_key}->{'host'},
512 $hosts->{$host_key}->{'ip'}
515 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
519 # get backups for a host
520 my @backups = $bpc->BackupInfoRead($hostname);
521 my $incs = scalar @backups;
523 my $host_header = sprintf("host %s [%d/%d]: %d increments\n",
524 $hosts->{$host_key}->{'host'},
529 print $host_header unless ($opt{q});
534 foreach my $backup (@backups) {
537 last if ($opt{m} && $inc_nr > $opt{m});
539 my $backupNum = $backup->{'num'};
540 my @backupShares = ();
542 my $share_header = sprintf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
543 $hosts->{$host_key}->{'host'},
544 $inc_nr, $incs, $backupNum,
545 $backup->{type} || '?',
546 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
547 strftime($t_fmt,localtime($backup->{startTime})),
548 fmt_time($backup->{endTime} - $backup->{startTime})
550 print $share_header unless ($opt{q});
552 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
553 foreach my $share ($files->shareList($backupNum)) {
557 $shareID = getShareID($share, $hostID, $hostname);
559 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
560 my ($count) = $sth->{backups_count}->fetchrow_array();
561 # skip if allready in database!
562 next if ($count > 0);
564 # dump host and share header for -q
568 $host_header = undef;
574 print curr_time," ", $share;
576 $sth->{insert_backups}->execute(
579 $backup->{'endTime'},
580 substr($backup->{'type'},0,4),
584 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
587 $sth->{update_backups_size}->execute(
591 $backup->{'endTime'},
592 substr($backup->{'type'},0,4),
603 my $dur = (time() - $t) || 1;
604 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
606 ($size / 1024 / 1024),
611 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
620 print "total duration: ",fmt_time(time() - $start_t),"\n";
626 my ($share, $hostID, $hostname) = @_;
628 $sth->{share_id} ||= $dbh->prepare(qq{
629 SELECT ID FROM shares WHERE hostID=? AND name=?
632 $sth->{share_id}->execute($hostID,$share);
634 my ($id) = $sth->{share_id}->fetchrow_array();
636 return $id if (defined($id));
638 $sth->{insert_share} ||= $dbh->prepare(qq{
644 my $drop_down = $hostname . '/' . $share;
645 $drop_down =~ s#//+#/#g;
647 $sth->{insert_share}->execute($hostID,$share, $drop_down);
648 return $dbh->last_insert_id(undef,undef,'shares',undef);
656 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
658 return $beenThere->{$key} if (defined($beenThere->{$key}));
660 $sth->{file_in_db} ||= $dbh->prepare(qq{
662 WHERE shareID = ? and
665 ( date = ? or date = ? or date = ? )
669 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
670 $sth->{file_in_db}->execute(@param);
671 my $rows = $sth->{file_in_db}->rows;
672 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
674 $beenThere->{$key}++;
676 $sth->{'insert_files'}->execute(@data) unless ($rows);
680 ####################################################
681 # recursing through filesystem structure and #
682 # and returning flattened files list #
683 ####################################################
684 sub recurseDir($$$$$$$$) {
686 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
688 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
690 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
695 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
696 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
698 # first, add all the entries in current directory
699 foreach my $path_key (keys %{$filesInBackup}) {
700 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
705 $filesInBackup->{$path_key}->{'relPath'},
706 $filesInBackup->{$path_key}->{'mtime'},
707 $filesInBackup->{$path_key}->{'type'},
708 $filesInBackup->{$path_key}->{'size'}
711 my $key = join(" ", (
715 $filesInBackup->{$path_key}->{'mtime'},
716 $filesInBackup->{$path_key}->{'size'}
719 my $key_dst_prev = join(" ", (
723 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
724 $filesInBackup->{$path_key}->{'size'}
727 my $key_dst_next = join(" ", (
731 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
732 $filesInBackup->{$path_key}->{'size'}
737 ! defined($beenThere->{$key}) &&
738 ! defined($beenThere->{$key_dst_prev}) &&
739 ! defined($beenThere->{$key_dst_next}) &&
740 ! ($found = found_in_db($key, @data))
742 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
744 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
745 $new_dirs++ unless ($found);
746 print STDERR " dir\n" if ($debug >= 2);
748 $new_files++ unless ($found);
749 print STDERR " file\n" if ($debug >= 2);
751 $size += $filesInBackup->{$path_key}->{'size'} || 0;
754 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
757 my $full_path = $dir . '/' . $path_key;
758 push @stack, $full_path;
759 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
761 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
773 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
775 while ( my $dir = shift @stack ) {
776 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
777 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
786 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);