1 #!/usr/local/bin/perl -w
4 use lib "__INSTALLDIR__/lib";
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
16 use constant BPC_FTYPE_DIR => 5;
17 use constant EST_CHUNK => 100000;
24 my $pidfile = new File::Pid;
26 if (my $pid = $pidfile->running ) {
27 die "$0 already running: $pid\n";
28 } elsif ($pidfile->pid ne $$) {
30 $pidfile = new File::Pid;
33 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
35 my $t_fmt = '%Y-%m-%d %H:%M:%S';
38 my $bpc = BackupPC::Lib->new || die;
39 my %Conf = $bpc->Conf();
40 my $TopDir = $bpc->TopDir();
43 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
44 my $user = $Conf{SearchUser} || '';
46 my $use_hest = $Conf{HyperEstraierIndex};
47 my ($index_path, $index_node_url) = BackupPC::SearchLib::getHyperEstraier_url($use_hest);
49 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
53 if ( !getopts("cdm:v:i", \%opt ) ) {
55 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
58 -c create database on first use
59 -d delete database before import
60 -m num import just num increments for one host
61 -v num set verbosity (debug) level (default $debug)
62 -i update HyperEstraier full text index
68 print "Debug level at $opt{v}\n";
75 my $t = shift || return;
77 my ($ss,$mm,$hh) = gmtime($t);
78 $out .= "${hh}h" if ($hh);
79 $out .= sprintf("%02d:%02d", $mm,$ss);
84 return strftime($t_fmt,localtime());
93 print "\nCaught a SIG$sig--syncing database and shutting down\n";
100 $SIG{'INT'} = \&signal;
101 $SIG{'QUIT'} = \&signal;
105 my ($host_id, $share_id, $num) = @_;
108 print STDERR "HyperEstraier support not enabled in configuration\n";
112 print curr_time," updating HyperEstraier:";
119 print " opening index $use_hest";
121 $hest_db = HyperEstraier::Database->new();
122 $hest_db->open($TopDir . $index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
124 } elsif ($index_node_url) {
125 $hest_node ||= HyperEstraier::Node->new($index_node_url);
126 $hest_node->set_auth('admin', 'admin');
127 print " via node URL";
129 die "don't know how to use HyperEstraier Index $use_hest";
131 print " increment is " . EST_CHUNK . " files:";
139 if ($host_id && $share_id && $num) {
146 @data = ( $host_id, $share_id, $num );
149 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
151 my $sth = $dbh->prepare(qq{
155 shares.name AS sname,
156 -- shares.share AS sharename,
157 files.backupnum AS backupnum,
158 -- files.name AS filename,
159 files.path AS filepath,
163 files.shareid AS shareid,
164 backups.date AS backup_date
166 INNER JOIN shares ON files.shareID=shares.ID
167 INNER JOIN hosts ON hosts.ID = shares.hostID
168 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
173 $sth->execute(@data);
174 $results = $sth->rows;
177 print " - no new files\n";
182 my $t = shift || return;
183 my $iso = BackupPC::Lib::timeStamp($t);
188 while (my $row = $sth->fetchrow_hashref()) {
190 my $fid = $row->{'fid'} || die "no fid?";
191 my $uri = 'file:///' . $fid;
193 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
194 next unless ($id == -1);
196 # create a document object
197 my $doc = HyperEstraier::Document->new;
199 # add attributes to the document object
200 $doc->add_attr('@uri', $uri);
202 foreach my $c (@{ $sth->{NAME} }) {
203 $doc->add_attr($c, $row->{$c}) if ($row->{$c});
206 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
208 # add the body text to the document object
209 my $path = $row->{'filepath'};
210 $doc->add_text($path);
211 $path =~ s/(.)/$1 /g;
212 $doc->add_hidden_text($path);
214 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
216 # register the document object to the database
218 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
219 } elsif ($hest_node) {
220 $hest_node->put_doc($doc);
228 $hest_db->sync() if ($index_path);
230 $offset += EST_CHUNK;
232 } while ($results == EST_CHUNK);
239 my $dur = (time() - $t) || 1;
240 printf(" [%.2f/s dur: %s]\n",
250 if (($opt{i} || ($index_path && ! -e $index_path)) && !$opt{c}) {
252 print "force update of HyperEstraier index ";
253 print "importing existing data" unless (-e $index_path);
254 print "by -i flag" if ($opt{i});
262 my $index = shift || return;
263 my ($table,$col,$unique) = split(/_/, $index);
266 $dbh->do(qq{ create $unique index $index on $table($col) });
269 print "creating tables...\n";
273 ID SERIAL PRIMARY KEY,
274 name VARCHAR(30) NOT NULL,
280 create table shares (
281 ID SERIAL PRIMARY KEY,
282 hostID INTEGER NOT NULL references hosts(id),
283 name VARCHAR(30) NOT NULL,
284 share VARCHAR(200) NOT NULL,
285 localpath VARCHAR(200)
291 ID SERIAL PRIMARY KEY,
292 num INTEGER NOT NULL,
293 name VARCHAR(255) NOT NULL,
299 create table backups (
300 hostID INTEGER NOT NULL references hosts(id),
301 num INTEGER NOT NULL,
302 date integer NOT NULL,
303 type CHAR(4) not null,
304 shareID integer not null references shares(id),
305 size integer not null,
306 PRIMARY KEY(hostID, num, shareID)
310 #do_index('backups_hostid,num_unique');
315 ID SERIAL PRIMARY KEY,
316 shareID INTEGER NOT NULL references shares(id),
317 backupNum INTEGER NOT NULL,
318 name VARCHAR(255) NOT NULL,
319 path VARCHAR(255) NOT NULL,
320 date integer NOT NULL,
321 type INTEGER NOT NULL,
322 size INTEGER NOT NULL
333 username varchar(20) not null,
341 create table archive_backup
343 archive_id int not null,
344 backup_id int not null,
346 primary key(archive_id, backup_id)
351 create table workflows(
353 step_id int not null,
356 username varchar(20),
357 archive_id int not null,
358 running boolean default true,
364 create table workflow_step
366 step_id int not null,
369 stop boolean default false,
375 alter table workflow_step
376 add constraint fk_workflow_next_step
377 foreign key(next_step)
378 references workflow_step(step_id);
382 alter table workflows
383 add constraint fk_workflows_step_id
385 references workflow_step(step_id);
389 alter table workflows
390 add constraint fk_workflows_archive_id
391 foreign key(archive_id)
392 references archive(id);
396 create table workflow_log
398 workflow_id int not null,
399 step_id int not null,
400 date timestamp not null,
402 primary key(workflow_id, step_id)
407 alter table workflow_log
408 add constraint fk_workflow_log_workflow_id
409 foreign key (workflow_id)
410 references workflows(id);
414 alter table workflow_log
415 add constraint fk_workflow_log_step_id
416 foreign key (step_id)
417 references workflow_step(step_id);
420 print "creating indexes:";
422 foreach my $index (qw(
443 ## delete data before inseting ##
446 foreach my $table (qw(files dvds backups shares hosts)) {
448 $dbh->do(qq{ DELETE FROM $table });
455 ## insert new values ##
458 $hosts = $bpc->HostInfoRead();
464 $sth->{insert_hosts} = $dbh->prepare(qq{
465 INSERT INTO hosts (name, IP) VALUES (?,?)
468 $sth->{hosts_by_name} = $dbh->prepare(qq{
469 SELECT ID FROM hosts WHERE name=?
472 $sth->{backups_count} = $dbh->prepare(qq{
475 WHERE hostID=? AND num=? AND shareid=?
478 $sth->{insert_backups} = $dbh->prepare(qq{
479 INSERT INTO backups (hostID, num, date, type, shareid, size)
483 $sth->{insert_files} = $dbh->prepare(qq{
485 (shareID, backupNum, name, path, date, type, size)
486 VALUES (?,?,?,?,?,?,?)
489 foreach my $host_key (keys %{$hosts}) {
491 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
493 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
495 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
496 $sth->{insert_hosts}->execute(
497 $hosts->{$host_key}->{'host'},
498 $hosts->{$host_key}->{'ip'}
501 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
504 print "host ".$hosts->{$host_key}->{'host'}.": ";
506 # get backups for a host
507 my @backups = $bpc->BackupInfoRead($hostname);
508 my $incs = scalar @backups;
509 print "$incs increments\n";
514 foreach my $backup (@backups) {
517 last if ($opt{m} && $inc_nr > $opt{m});
519 my $backupNum = $backup->{'num'};
520 my @backupShares = ();
522 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
523 $hosts->{$host_key}->{'host'},
524 $inc_nr, $incs, $backupNum,
525 $backup->{type} || '?',
526 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
527 strftime($t_fmt,localtime($backup->{startTime})),
528 fmt_time($backup->{endTime} - $backup->{startTime})
531 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
532 foreach my $share ($files->shareList($backupNum)) {
536 $shareID = getShareID($share, $hostID, $hostname);
538 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
539 my ($count) = $sth->{backups_count}->fetchrow_array();
540 # skip if allready in database!
541 next if ($count > 0);
544 print curr_time," ", $share;
546 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
548 $sth->{insert_backups}->execute(
551 $backup->{'endTime'},
552 substr($backup->{'type'},0,4),
560 my $dur = (time() - $t) || 1;
561 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
563 ($size / 1024 / 1024),
568 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
577 print "total duration: ",fmt_time(time() - $start_t),"\n";
583 my ($share, $hostID, $hostname) = @_;
585 $sth->{share_id} ||= $dbh->prepare(qq{
586 SELECT ID FROM shares WHERE hostID=? AND name=?
589 $sth->{share_id}->execute($hostID,$share);
591 my ($id) = $sth->{share_id}->fetchrow_array();
593 return $id if (defined($id));
595 $sth->{insert_share} ||= $dbh->prepare(qq{
597 (hostID,name,share,localpath)
601 my $drop_down = $hostname . '/' . $share;
602 $drop_down =~ s#//+#/#g;
604 $sth->{insert_share}->execute($hostID,$share, $drop_down ,undef);
605 return $dbh->last_insert_id(undef,undef,'shares',undef);
613 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
615 return $beenThere->{$key} if (defined($beenThere->{$key}));
617 $sth->{file_in_db} ||= $dbh->prepare(qq{
619 WHERE shareID = ? and
626 my @param = ($shareID,$path,$date,$size);
627 $sth->{file_in_db}->execute(@param);
628 my $rows = $sth->{file_in_db}->rows;
629 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
631 $beenThere->{$key}++;
633 $sth->{'insert_files'}->execute(@data) unless ($rows);
637 ####################################################
638 # recursing through filesystem structure and #
639 # and returning flattened files list #
640 ####################################################
641 sub recurseDir($$$$$$$$) {
643 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
645 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
647 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
652 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
653 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
655 # first, add all the entries in current directory
656 foreach my $path_key (keys %{$filesInBackup}) {
657 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
662 $filesInBackup->{$path_key}->{'relPath'},
663 $filesInBackup->{$path_key}->{'mtime'},
664 $filesInBackup->{$path_key}->{'type'},
665 $filesInBackup->{$path_key}->{'size'}
668 my $key = join(" ", (
672 $filesInBackup->{$path_key}->{'mtime'},
673 $filesInBackup->{$path_key}->{'size'}
677 if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) {
678 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
680 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
681 $new_dirs++ unless ($found);
682 print STDERR " dir\n" if ($debug >= 2);
684 $new_files++ unless ($found);
685 print STDERR " file\n" if ($debug >= 2);
687 $size += $filesInBackup->{$path_key}->{'size'} || 0;
690 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
693 my $full_path = $dir . '/' . $path_key;
694 push @stack, $full_path;
695 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
697 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
709 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
711 while ( my $dir = shift @stack ) {
712 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
713 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
722 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);