1 #!/usr/local/bin/perl -w
4 use lib "__INSTALLDIR__/lib";
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
17 use constant BPC_FTYPE_DIR => 5;
18 use constant EST_CHUNK => 100000;
20 # daylight saving time change offset for 1h
21 my $dst_offset = 60 * 60;
28 my $pid_path = abs_path($0);
29 $pid_path =~ s/\W+/_/g;
31 my $pidfile = new File::Pid({
32 file => "/tmp/$pid_path",
35 if (my $pid = $pidfile->running ) {
36 die "$0 already running: $pid\n";
37 } elsif ($pidfile->pid ne $$) {
39 $pidfile = new File::Pid;
41 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
44 my $t_fmt = '%Y-%m-%d %H:%M:%S';
47 my $bpc = BackupPC::Lib->new || die;
48 my %Conf = $bpc->Conf();
49 my $TopDir = $bpc->TopDir();
52 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
53 my $user = $Conf{SearchUser} || '';
55 my $index_node_url = $Conf{HyperEstraierIndex};
57 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
61 if ( !getopts("cdm:v:ijf", \%opt ) ) {
63 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
66 -c create database on first use
67 -d delete database before import
68 -m num import just num increments for one host
69 -v num set verbosity (debug) level (default $debug)
70 -i update Hyper Estraier full text index
71 -j update full text, don't check existing files
72 -f don't do anything with full text index
74 Option -j is variation on -i. It will allow faster initial creation
75 of full-text index from existing database.
77 Option -f will create database which is out of sync with full text index. You
78 will have to re-run $0 with -i to fix it.
85 print "Debug level at $opt{v}\n";
88 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
89 $index_node_url = undef;
95 my $t = shift || return;
97 my ($ss,$mm,$hh) = gmtime($t);
98 $out .= "${hh}h" if ($hh);
99 $out .= sprintf("%02d:%02d", $mm,$ss);
104 return strftime($t_fmt,localtime());
113 print "\nCaught a SIG$sig--syncing database and shutting down\n";
120 $SIG{'INT'} = \&signal;
121 $SIG{'QUIT'} = \&signal;
125 my ($host_id, $share_id, $num) = @_;
127 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
129 unless (defined($index_node_url)) {
130 print STDERR "HyperEstraier support not enabled in configuration\n";
135 print curr_time," updating Hyper Estraier:";
142 print " opening index $index_node_url";
143 if ($index_node_url) {
144 $hest_node ||= Search::Estraier::Node->new($index_node_url);
145 $hest_node->set_auth('admin', 'admin');
146 print " via node URL";
148 die "don't know how to use Hyper Estraier Index $index_node_url";
157 if (defined($host_id) && defined($share_id) && defined($num)) {
164 @data = ( $host_id, $share_id, $num );
167 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
169 my $sth = $dbh->prepare(qq{
173 shares.name AS sname,
174 -- shares.share AS sharename,
175 files.backupnum AS backupnum,
176 -- files.name AS filename,
177 files.path AS filepath,
181 files.shareid AS shareid,
182 backups.date AS backup_date
184 INNER JOIN shares ON files.shareID=shares.ID
185 INNER JOIN hosts ON hosts.ID = shares.hostID
186 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
191 $sth->execute(@data);
192 $results = $sth->rows;
195 print " - no new files\n";
198 print " - $results files: ";
202 my $t = shift || return;
203 my $iso = BackupPC::Lib::timeStamp($t);
208 while (my $row = $sth->fetchrow_hashref()) {
210 my $fid = $row->{'fid'} || die "no fid?";
211 my $uri = 'file:///' . $fid;
213 unless ($skip_check) {
214 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
215 next unless ($id == -1);
218 # create a document object
219 my $doc = Search::Estraier::Document->new;
221 # add attributes to the document object
222 $doc->add_attr('@uri', $uri);
224 foreach my $c (@{ $sth->{NAME} }) {
225 print STDERR "attr $c = $row->{$c}\n" if ($debug > 2);
226 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
229 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
231 # add the body text to the document object
232 my $path = $row->{'filepath'};
233 $doc->add_text($path);
234 $path =~ s/(.)/$1 /g;
235 $doc->add_hidden_text($path);
237 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
239 # register the document object to the database
241 $hest_node->put_doc($doc);
250 $offset += EST_CHUNK;
252 } while ($results == EST_CHUNK);
254 my $dur = (time() - $t) || 1;
255 printf(" [%.2f/s dur: %s]\n",
265 if ( ( $opt{i} || $opt{j} ) && !$opt{c} ) {
267 print "force update of Hyper Estraier index ";
268 print "by -i flag" if ($opt{i});
269 print "by -j flag" if ($opt{j});
277 my $index = shift || return;
278 my ($table,$col,$unique) = split(/:/, $index);
281 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
282 $dbh->do(qq{ create $unique index $index on $table($col) });
285 print "creating tables...\n";
289 ID SERIAL PRIMARY KEY,
290 name VARCHAR(30) NOT NULL,
294 create table shares (
295 ID SERIAL PRIMARY KEY,
296 hostID INTEGER NOT NULL references hosts(id),
297 name VARCHAR(30) NOT NULL,
298 share VARCHAR(200) NOT NULL
302 ID SERIAL PRIMARY KEY,
303 num INTEGER NOT NULL,
304 name VARCHAR(255) NOT NULL,
308 create table backups (
310 hostID INTEGER NOT NULL references hosts(id),
311 num INTEGER NOT NULL,
312 date integer NOT NULL,
313 type CHAR(4) not null,
314 shareID integer not null references shares(id),
315 size bigint not null,
316 inc_size bigint not null default -1,
317 inc_deleted boolean default false,
318 parts integer not null default 1,
324 shareID INTEGER NOT NULL references shares(id),
325 backupNum INTEGER NOT NULL,
326 name VARCHAR(255) NOT NULL,
327 path VARCHAR(255) NOT NULL,
328 date integer NOT NULL,
329 type INTEGER NOT NULL,
330 size bigint NOT NULL,
334 create table archive (
337 total_size bigint default -1,
339 username varchar(20) not null,
340 date timestamp default now(),
344 create table archive_backup (
345 archive_id int not null references archive(id) on delete cascade,
346 backup_id int not null references backups(id),
347 primary key(archive_id, backup_id)
350 create table archive_burned (
351 archive_id int references archive(id),
352 date timestamp default now(),
353 part int not null default 1,
354 copy int not null default 1,
355 iso_size bigint default -1
358 create table backup_parts (
360 backup_id int references backups(id),
361 part_nr int not null check (part_nr > 0),
362 tar_size bigint not null check (tar_size > 0),
363 size bigint not null check (size > 0),
365 items int not null check (items > 0),
366 date timestamp default now(),
371 print "creating indexes: ";
373 foreach my $index (qw(
386 archive_burned:archive_id
387 backup_parts:backup_id,part_nr
392 print " creating sequence: ";
393 foreach my $seq (qw/dvd_nr/) {
395 $dbh->do( qq{ CREATE SEQUENCE $seq } );
405 ## delete data before inseting ##
408 foreach my $table (qw(files dvds backups shares hosts)) {
410 $dbh->do(qq{ DELETE FROM $table });
417 ## insert new values ##
420 $hosts = $bpc->HostInfoRead();
426 $sth->{insert_hosts} = $dbh->prepare(qq{
427 INSERT INTO hosts (name, IP) VALUES (?,?)
430 $sth->{hosts_by_name} = $dbh->prepare(qq{
431 SELECT ID FROM hosts WHERE name=?
434 $sth->{backups_count} = $dbh->prepare(qq{
437 WHERE hostID=? AND num=? AND shareid=?
440 $sth->{insert_backups} = $dbh->prepare(qq{
441 INSERT INTO backups (hostID, num, date, type, shareid, size)
442 VALUES (?,?,?,?,?,-1)
445 $sth->{update_backups_size} = $dbh->prepare(qq{
446 UPDATE backups SET size = ?
447 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
450 $sth->{insert_files} = $dbh->prepare(qq{
452 (shareID, backupNum, name, path, date, type, size)
453 VALUES (?,?,?,?,?,?,?)
456 my @hosts = keys %{$hosts};
459 foreach my $host_key (@hosts) {
461 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
463 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
465 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
466 $sth->{insert_hosts}->execute(
467 $hosts->{$host_key}->{'host'},
468 $hosts->{$host_key}->{'ip'}
471 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
475 print "host ", $hosts->{$host_key}->{'host'}, " [",
476 $host_nr, "/", ($#hosts + 1), "]: ";
478 # get backups for a host
479 my @backups = $bpc->BackupInfoRead($hostname);
480 my $incs = scalar @backups;
481 print "$incs increments\n";
486 foreach my $backup (@backups) {
489 last if ($opt{m} && $inc_nr > $opt{m});
491 my $backupNum = $backup->{'num'};
492 my @backupShares = ();
494 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
495 $hosts->{$host_key}->{'host'},
496 $inc_nr, $incs, $backupNum,
497 $backup->{type} || '?',
498 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
499 strftime($t_fmt,localtime($backup->{startTime})),
500 fmt_time($backup->{endTime} - $backup->{startTime})
503 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
504 foreach my $share ($files->shareList($backupNum)) {
508 $shareID = getShareID($share, $hostID, $hostname);
510 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
511 my ($count) = $sth->{backups_count}->fetchrow_array();
512 # skip if allready in database!
513 next if ($count > 0);
516 print curr_time," ", $share;
518 $sth->{insert_backups}->execute(
521 $backup->{'endTime'},
522 substr($backup->{'type'},0,4),
526 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
529 $sth->{update_backups_size}->execute(
533 $backup->{'endTime'},
534 substr($backup->{'type'},0,4),
545 my $dur = (time() - $t) || 1;
546 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
548 ($size / 1024 / 1024),
553 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
562 print "total duration: ",fmt_time(time() - $start_t),"\n";
568 my ($share, $hostID, $hostname) = @_;
570 $sth->{share_id} ||= $dbh->prepare(qq{
571 SELECT ID FROM shares WHERE hostID=? AND name=?
574 $sth->{share_id}->execute($hostID,$share);
576 my ($id) = $sth->{share_id}->fetchrow_array();
578 return $id if (defined($id));
580 $sth->{insert_share} ||= $dbh->prepare(qq{
586 my $drop_down = $hostname . '/' . $share;
587 $drop_down =~ s#//+#/#g;
589 $sth->{insert_share}->execute($hostID,$share, $drop_down);
590 return $dbh->last_insert_id(undef,undef,'shares',undef);
598 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
600 return $beenThere->{$key} if (defined($beenThere->{$key}));
602 $sth->{file_in_db} ||= $dbh->prepare(qq{
604 WHERE shareID = ? and
607 ( date = ? or date = ? or date = ? )
611 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
612 $sth->{file_in_db}->execute(@param);
613 my $rows = $sth->{file_in_db}->rows;
614 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
616 $beenThere->{$key}++;
618 $sth->{'insert_files'}->execute(@data) unless ($rows);
622 ####################################################
623 # recursing through filesystem structure and #
624 # and returning flattened files list #
625 ####################################################
626 sub recurseDir($$$$$$$$) {
628 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
630 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
632 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
637 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
638 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
640 # first, add all the entries in current directory
641 foreach my $path_key (keys %{$filesInBackup}) {
642 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
647 $filesInBackup->{$path_key}->{'relPath'},
648 $filesInBackup->{$path_key}->{'mtime'},
649 $filesInBackup->{$path_key}->{'type'},
650 $filesInBackup->{$path_key}->{'size'}
653 my $key = join(" ", (
657 $filesInBackup->{$path_key}->{'mtime'},
658 $filesInBackup->{$path_key}->{'size'}
661 my $key_dst_prev = join(" ", (
665 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
666 $filesInBackup->{$path_key}->{'size'}
669 my $key_dst_next = join(" ", (
673 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
674 $filesInBackup->{$path_key}->{'size'}
679 ! defined($beenThere->{$key}) &&
680 ! defined($beenThere->{$key_dst_prev}) &&
681 ! defined($beenThere->{$key_dst_next}) &&
682 ! ($found = found_in_db($key, @data))
684 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
686 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
687 $new_dirs++ unless ($found);
688 print STDERR " dir\n" if ($debug >= 2);
690 $new_files++ unless ($found);
691 print STDERR " file\n" if ($debug >= 2);
693 $size += $filesInBackup->{$path_key}->{'size'} || 0;
696 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
699 my $full_path = $dir . '/' . $path_key;
700 push @stack, $full_path;
701 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
703 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
715 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
717 while ( my $dir = shift @stack ) {
718 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
719 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
728 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);