1 #!/usr/local/bin/perl -w
4 use lib "__INSTALLDIR__/lib";
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
17 use constant BPC_FTYPE_DIR => 5;
18 use constant EST_CHUNK => 100000;
20 # daylight saving time change offset for 1h
21 my $dst_offset = 60 * 60;
28 my $pid_path = abs_path($0);
29 $pid_path =~ s/\W+/_/g;
31 my $pidfile = new File::Pid({
32 file => "/tmp/$pid_path",
35 if (my $pid = $pidfile->running ) {
36 die "$0 already running: $pid\n";
37 } elsif ($pidfile->pid ne $$) {
39 $pidfile = new File::Pid;
41 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
44 my $t_fmt = '%Y-%m-%d %H:%M:%S';
47 my $bpc = BackupPC::Lib->new || die;
48 my %Conf = $bpc->Conf();
49 my $TopDir = $bpc->TopDir();
52 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
53 my $user = $Conf{SearchUser} || '';
55 my $use_hest = $Conf{HyperEstraierIndex};
56 my ($index_path, $index_node_url) = BackupPC::SearchLib::getHyperEstraier_url($use_hest);
58 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
62 if ( !getopts("cdm:v:ijf", \%opt ) ) {
64 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
67 -c create database on first use
68 -d delete database before import
69 -m num import just num increments for one host
70 -v num set verbosity (debug) level (default $debug)
71 -i update Hyper Estraier full text index
72 -j update full text, don't check existing files
73 -f don't do anything with full text index
75 Option -j is variation on -i. It will allow faster initial creation
76 of full-text index from existing database.
78 Option -f will create database which is out of sync with full text index. You
79 will have to re-run $0 with -i to fix it.
86 print "Debug level at $opt{v}\n";
89 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
90 ($use_hest, $index_path, $index_node_url) = (undef, undef, undef);
96 my $t = shift || return;
98 my ($ss,$mm,$hh) = gmtime($t);
99 $out .= "${hh}h" if ($hh);
100 $out .= sprintf("%02d:%02d", $mm,$ss);
105 return strftime($t_fmt,localtime());
114 print "\nCaught a SIG$sig--syncing database and shutting down\n";
121 $SIG{'INT'} = \&signal;
122 $SIG{'QUIT'} = \&signal;
126 my ($host_id, $share_id, $num) = @_;
128 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
130 unless (defined($use_hest)) {
131 print STDERR "HyperEstraier support not enabled in configuration\n";
136 return unless($use_hest);
138 print curr_time," updating HyperEstraier:";
145 print " opening index $use_hest";
147 $hest_db = HyperEstraier::Database->new();
148 $hest_db->open($TopDir . $index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
150 } elsif ($index_node_url) {
151 $hest_node ||= HyperEstraier::Node->new($index_node_url);
152 $hest_node->set_auth('admin', 'admin');
153 print " via node URL";
155 die "don't know how to use HyperEstraier Index $use_hest";
157 print " increment is " . EST_CHUNK . " files:";
165 if (defined($host_id) && defined($share_id) && defined($num)) {
172 @data = ( $host_id, $share_id, $num );
175 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
177 my $sth = $dbh->prepare(qq{
181 shares.name AS sname,
182 -- shares.share AS sharename,
183 files.backupnum AS backupnum,
184 -- files.name AS filename,
185 files.path AS filepath,
189 files.shareid AS shareid,
190 backups.date AS backup_date
192 INNER JOIN shares ON files.shareID=shares.ID
193 INNER JOIN hosts ON hosts.ID = shares.hostID
194 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
199 $sth->execute(@data);
200 $results = $sth->rows;
203 print " - no new files\n";
208 my $t = shift || return;
209 my $iso = BackupPC::Lib::timeStamp($t);
214 while (my $row = $sth->fetchrow_hashref()) {
216 my $fid = $row->{'fid'} || die "no fid?";
217 my $uri = 'file:///' . $fid;
219 unless ($skip_check) {
220 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
221 next unless ($id == -1);
224 # create a document object
225 my $doc = HyperEstraier::Document->new;
227 # add attributes to the document object
228 $doc->add_attr('@uri', $uri);
230 foreach my $c (@{ $sth->{NAME} }) {
231 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
234 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
236 # add the body text to the document object
237 my $path = $row->{'filepath'};
238 $doc->add_text($path);
239 $path =~ s/(.)/$1 /g;
240 $doc->add_hidden_text($path);
242 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
244 # register the document object to the database
246 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
247 } elsif ($hest_node) {
248 $hest_node->put_doc($doc);
256 $hest_db->sync() if ($index_path);
258 $offset += EST_CHUNK;
260 } while ($results == EST_CHUNK);
267 my $dur = (time() - $t) || 1;
268 printf(" [%.2f/s dur: %s]\n",
278 if (($opt{i} || $opt{j} || ($index_path && ! -e $TopDir . $index_path)) && !$opt{c}) {
280 print "force update of HyperEstraier index ";
281 print "importing existing data" unless (-e $TopDir . $index_path);
282 print "by -i flag" if ($opt{i});
283 print "by -j flag" if ($opt{j});
291 my $index = shift || return;
292 my ($table,$col,$unique) = split(/:/, $index);
295 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
296 $dbh->do(qq{ create $unique index $index on $table($col) });
299 print "creating tables...\n";
303 ID SERIAL PRIMARY KEY,
304 name VARCHAR(30) NOT NULL,
308 create table shares (
309 ID SERIAL PRIMARY KEY,
310 hostID INTEGER NOT NULL references hosts(id),
311 name VARCHAR(30) NOT NULL,
312 share VARCHAR(200) NOT NULL
316 ID SERIAL PRIMARY KEY,
317 num INTEGER NOT NULL,
318 name VARCHAR(255) NOT NULL,
322 create table backups (
324 hostID INTEGER NOT NULL references hosts(id),
325 num INTEGER NOT NULL,
326 date integer NOT NULL,
327 type CHAR(4) not null,
328 shareID integer not null references shares(id),
329 size bigint not null,
330 inc_size bigint not null default -1,
331 inc_deleted boolean default false,
332 parts integer not null default 1,
338 shareID INTEGER NOT NULL references shares(id),
339 backupNum INTEGER NOT NULL,
340 name VARCHAR(255) NOT NULL,
341 path VARCHAR(255) NOT NULL,
342 date integer NOT NULL,
343 type INTEGER NOT NULL,
344 size bigint NOT NULL,
348 create table archive (
351 total_size bigint default -1,
353 username varchar(20) not null,
354 date timestamp default now(),
358 create table archive_backup (
359 archive_id int not null references archive(id) on delete cascade,
360 backup_id int not null references backups(id),
361 primary key(archive_id, backup_id)
364 create table archive_burned (
365 archive_id int references archive(id),
366 date timestamp default now(),
367 part int not null default 1,
368 copy int not null default 1,
369 iso_size bigint default -1
372 create table backup_parts (
374 backup_id int references backups(id),
375 part_nr int not null check (part_nr > 0),
376 tar_size bigint not null check (tar_size > 0),
377 size bigint not null check (size > 0),
379 items int not null check (items > 0),
380 date timestamp default now(),
385 print "creating indexes: ";
387 foreach my $index (qw(
400 archive_burned:archive_id
401 backup_parts:backup_id,part_nr
406 print " creating sequence: ";
407 foreach my $seq (qw/dvd_nr/) {
409 $dbh->do( qq{ CREATE SEQUENCE $seq } );
419 ## delete data before inseting ##
422 foreach my $table (qw(files dvds backups shares hosts)) {
424 $dbh->do(qq{ DELETE FROM $table });
431 ## insert new values ##
434 $hosts = $bpc->HostInfoRead();
440 $sth->{insert_hosts} = $dbh->prepare(qq{
441 INSERT INTO hosts (name, IP) VALUES (?,?)
444 $sth->{hosts_by_name} = $dbh->prepare(qq{
445 SELECT ID FROM hosts WHERE name=?
448 $sth->{backups_count} = $dbh->prepare(qq{
451 WHERE hostID=? AND num=? AND shareid=?
454 $sth->{insert_backups} = $dbh->prepare(qq{
455 INSERT INTO backups (hostID, num, date, type, shareid, size)
456 VALUES (?,?,?,?,?,-1)
459 $sth->{update_backups_size} = $dbh->prepare(qq{
460 UPDATE backups SET size = ?
461 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
464 $sth->{insert_files} = $dbh->prepare(qq{
466 (shareID, backupNum, name, path, date, type, size)
467 VALUES (?,?,?,?,?,?,?)
470 my @hosts = keys %{$hosts};
473 foreach my $host_key (@hosts) {
475 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
477 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
479 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
480 $sth->{insert_hosts}->execute(
481 $hosts->{$host_key}->{'host'},
482 $hosts->{$host_key}->{'ip'}
485 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
489 print "host ", $hosts->{$host_key}->{'host'}, " [",
490 $host_nr, "/", ($#hosts + 1), "]: ";
492 # get backups for a host
493 my @backups = $bpc->BackupInfoRead($hostname);
494 my $incs = scalar @backups;
495 print "$incs increments\n";
500 foreach my $backup (@backups) {
503 last if ($opt{m} && $inc_nr > $opt{m});
505 my $backupNum = $backup->{'num'};
506 my @backupShares = ();
508 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
509 $hosts->{$host_key}->{'host'},
510 $inc_nr, $incs, $backupNum,
511 $backup->{type} || '?',
512 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
513 strftime($t_fmt,localtime($backup->{startTime})),
514 fmt_time($backup->{endTime} - $backup->{startTime})
517 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
518 foreach my $share ($files->shareList($backupNum)) {
522 $shareID = getShareID($share, $hostID, $hostname);
524 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
525 my ($count) = $sth->{backups_count}->fetchrow_array();
526 # skip if allready in database!
527 next if ($count > 0);
530 print curr_time," ", $share;
532 $sth->{insert_backups}->execute(
535 $backup->{'endTime'},
536 substr($backup->{'type'},0,4),
540 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
543 $sth->{update_backups_size}->execute(
547 $backup->{'endTime'},
548 substr($backup->{'type'},0,4),
559 my $dur = (time() - $t) || 1;
560 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
562 ($size / 1024 / 1024),
567 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
576 print "total duration: ",fmt_time(time() - $start_t),"\n";
582 my ($share, $hostID, $hostname) = @_;
584 $sth->{share_id} ||= $dbh->prepare(qq{
585 SELECT ID FROM shares WHERE hostID=? AND name=?
588 $sth->{share_id}->execute($hostID,$share);
590 my ($id) = $sth->{share_id}->fetchrow_array();
592 return $id if (defined($id));
594 $sth->{insert_share} ||= $dbh->prepare(qq{
600 my $drop_down = $hostname . '/' . $share;
601 $drop_down =~ s#//+#/#g;
603 $sth->{insert_share}->execute($hostID,$share, $drop_down);
604 return $dbh->last_insert_id(undef,undef,'shares',undef);
612 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
614 return $beenThere->{$key} if (defined($beenThere->{$key}));
616 $sth->{file_in_db} ||= $dbh->prepare(qq{
618 WHERE shareID = ? and
621 ( date = ? or date = ? or date = ? )
625 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
626 $sth->{file_in_db}->execute(@param);
627 my $rows = $sth->{file_in_db}->rows;
628 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
630 $beenThere->{$key}++;
632 $sth->{'insert_files'}->execute(@data) unless ($rows);
636 ####################################################
637 # recursing through filesystem structure and #
638 # and returning flattened files list #
639 ####################################################
640 sub recurseDir($$$$$$$$) {
642 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
644 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
646 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
651 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
652 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
654 # first, add all the entries in current directory
655 foreach my $path_key (keys %{$filesInBackup}) {
656 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
661 $filesInBackup->{$path_key}->{'relPath'},
662 $filesInBackup->{$path_key}->{'mtime'},
663 $filesInBackup->{$path_key}->{'type'},
664 $filesInBackup->{$path_key}->{'size'}
667 my $key = join(" ", (
671 $filesInBackup->{$path_key}->{'mtime'},
672 $filesInBackup->{$path_key}->{'size'}
675 my $key_dst_prev = join(" ", (
679 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
680 $filesInBackup->{$path_key}->{'size'}
683 my $key_dst_next = join(" ", (
687 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
688 $filesInBackup->{$path_key}->{'size'}
693 ! defined($beenThere->{$key}) &&
694 ! defined($beenThere->{$key_dst_prev}) &&
695 ! defined($beenThere->{$key_dst_next}) &&
696 ! ($found = found_in_db($key, @data))
698 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
700 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
701 $new_dirs++ unless ($found);
702 print STDERR " dir\n" if ($debug >= 2);
704 $new_files++ unless ($found);
705 print STDERR " file\n" if ($debug >= 2);
707 $size += $filesInBackup->{$path_key}->{'size'} || 0;
710 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
713 my $full_path = $dir . '/' . $path_key;
714 push @stack, $full_path;
715 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
717 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
729 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
731 while ( my $dir = shift @stack ) {
732 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
733 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
742 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);