1 #!/usr/local/bin/perl -w
4 use lib "__INSTALLDIR__/lib";
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
16 use constant BPC_FTYPE_DIR => 5;
17 use constant EST_CHUNK => 100000;
24 my $pidfile = new File::Pid;
26 if (my $pid = $pidfile->running ) {
27 die "$0 already running: $pid\n";
28 } elsif ($pidfile->pid ne $$) {
30 $pidfile = new File::Pid;
33 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
35 my $t_fmt = '%Y-%m-%d %H:%M:%S';
38 my $bpc = BackupPC::Lib->new || die;
39 my %Conf = $bpc->Conf();
40 my $TopDir = $bpc->TopDir();
43 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
44 my $user = $Conf{SearchUser} || '';
46 my $use_hest = $Conf{HyperEstraierIndex};
47 my ($index_path, $index_node_url) = BackupPC::SearchLib::getHyperEstraier_url($use_hest);
49 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
53 if ( !getopts("cdm:v:ij", \%opt ) ) {
55 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
58 -c create database on first use
59 -d delete database before import
60 -m num import just num increments for one host
61 -v num set verbosity (debug) level (default $debug)
62 -i update Hyper Estraier full text index
63 -j update full text, don't check existing files
65 Option -j is variation on -i. It will allow faster initial creation
66 of full-text index from existing database.
73 print "Debug level at $opt{v}\n";
80 my $t = shift || return;
82 my ($ss,$mm,$hh) = gmtime($t);
83 $out .= "${hh}h" if ($hh);
84 $out .= sprintf("%02d:%02d", $mm,$ss);
89 return strftime($t_fmt,localtime());
98 print "\nCaught a SIG$sig--syncing database and shutting down\n";
105 $SIG{'INT'} = \&signal;
106 $SIG{'QUIT'} = \&signal;
110 my ($host_id, $share_id, $num) = @_;
112 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
115 print STDERR "HyperEstraier support not enabled in configuration\n";
119 print curr_time," updating HyperEstraier:";
126 print " opening index $use_hest";
128 $hest_db = HyperEstraier::Database->new();
129 $hest_db->open($TopDir . $index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
131 } elsif ($index_node_url) {
132 $hest_node ||= HyperEstraier::Node->new($index_node_url);
133 $hest_node->set_auth('admin', 'admin');
134 print " via node URL";
136 die "don't know how to use HyperEstraier Index $use_hest";
138 print " increment is " . EST_CHUNK . " files:";
146 if ($host_id && $share_id && $num) {
153 @data = ( $host_id, $share_id, $num );
156 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
158 my $sth = $dbh->prepare(qq{
162 shares.name AS sname,
163 -- shares.share AS sharename,
164 files.backupnum AS backupnum,
165 -- files.name AS filename,
166 files.path AS filepath,
170 files.shareid AS shareid,
171 backups.date AS backup_date
173 INNER JOIN shares ON files.shareID=shares.ID
174 INNER JOIN hosts ON hosts.ID = shares.hostID
175 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
180 $sth->execute(@data);
181 $results = $sth->rows;
184 print " - no new files\n";
189 my $t = shift || return;
190 my $iso = BackupPC::Lib::timeStamp($t);
195 while (my $row = $sth->fetchrow_hashref()) {
197 my $fid = $row->{'fid'} || die "no fid?";
198 my $uri = 'file:///' . $fid;
200 unless ($skip_check) {
201 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
202 next unless ($id == -1);
205 # create a document object
206 my $doc = HyperEstraier::Document->new;
208 # add attributes to the document object
209 $doc->add_attr('@uri', $uri);
211 foreach my $c (@{ $sth->{NAME} }) {
212 $doc->add_attr($c, $row->{$c}) if ($row->{$c});
215 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
217 # add the body text to the document object
218 my $path = $row->{'filepath'};
219 $doc->add_text($path);
220 $path =~ s/(.)/$1 /g;
221 $doc->add_hidden_text($path);
223 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
225 # register the document object to the database
227 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
228 } elsif ($hest_node) {
229 $hest_node->put_doc($doc);
237 $hest_db->sync() if ($index_path);
239 $offset += EST_CHUNK;
241 } while ($results == EST_CHUNK);
248 my $dur = (time() - $t) || 1;
249 printf(" [%.2f/s dur: %s]\n",
259 if (($opt{i} || $opt{j} || ($index_path && ! -e $index_path)) && !$opt{c}) {
261 print "force update of HyperEstraier index ";
262 print "importing existing data" unless (-e $index_path);
263 print "by -i flag" if ($opt{i});
264 print "by -j flag" if ($opt{j});
272 my $index = shift || return;
273 my ($table,$col,$unique) = split(/_/, $index);
276 $dbh->do(qq{ create $unique index $index on $table($col) });
279 print "creating tables...\n";
283 ID SERIAL PRIMARY KEY,
284 name VARCHAR(30) NOT NULL,
290 create table shares (
291 ID SERIAL PRIMARY KEY,
292 hostID INTEGER NOT NULL references hosts(id),
293 name VARCHAR(30) NOT NULL,
294 share VARCHAR(200) NOT NULL,
295 localpath VARCHAR(200)
301 ID SERIAL PRIMARY KEY,
302 num INTEGER NOT NULL,
303 name VARCHAR(255) NOT NULL,
309 create table backups (
310 hostID INTEGER NOT NULL references hosts(id),
311 num INTEGER NOT NULL,
312 date integer NOT NULL,
313 type CHAR(4) not null,
314 shareID integer not null references shares(id),
315 size integer not null,
316 PRIMARY KEY(hostID, num, shareID)
320 #do_index('backups_hostid,num_unique');
325 ID SERIAL PRIMARY KEY,
326 shareID INTEGER NOT NULL references shares(id),
327 backupNum INTEGER NOT NULL,
328 name VARCHAR(255) NOT NULL,
329 path VARCHAR(255) NOT NULL,
330 date integer NOT NULL,
331 type INTEGER NOT NULL,
332 size INTEGER NOT NULL
343 username varchar(20) not null,
351 create table archive_backup
353 archive_id int not null,
354 backup_id int not null,
356 primary key(archive_id, backup_id)
361 create table workflows(
363 step_id int not null,
366 username varchar(20),
367 archive_id int not null,
368 running boolean default true,
374 create table workflow_step
376 step_id int not null,
379 stop boolean default false,
385 alter table workflow_step
386 add constraint fk_workflow_next_step
387 foreign key(next_step)
388 references workflow_step(step_id);
392 alter table workflows
393 add constraint fk_workflows_step_id
395 references workflow_step(step_id);
399 alter table workflows
400 add constraint fk_workflows_archive_id
401 foreign key(archive_id)
402 references archive(id);
406 create table workflow_log
408 workflow_id int not null,
409 step_id int not null,
410 date timestamp not null,
412 primary key(workflow_id, step_id)
417 alter table workflow_log
418 add constraint fk_workflow_log_workflow_id
419 foreign key (workflow_id)
420 references workflows(id);
424 alter table workflow_log
425 add constraint fk_workflow_log_step_id
426 foreign key (step_id)
427 references workflow_step(step_id);
430 print "creating indexes:";
432 foreach my $index (qw(
453 ## delete data before inseting ##
456 foreach my $table (qw(files dvds backups shares hosts)) {
458 $dbh->do(qq{ DELETE FROM $table });
465 ## insert new values ##
468 $hosts = $bpc->HostInfoRead();
474 $sth->{insert_hosts} = $dbh->prepare(qq{
475 INSERT INTO hosts (name, IP) VALUES (?,?)
478 $sth->{hosts_by_name} = $dbh->prepare(qq{
479 SELECT ID FROM hosts WHERE name=?
482 $sth->{backups_count} = $dbh->prepare(qq{
485 WHERE hostID=? AND num=? AND shareid=?
488 $sth->{insert_backups} = $dbh->prepare(qq{
489 INSERT INTO backups (hostID, num, date, type, shareid, size)
493 $sth->{insert_files} = $dbh->prepare(qq{
495 (shareID, backupNum, name, path, date, type, size)
496 VALUES (?,?,?,?,?,?,?)
499 foreach my $host_key (keys %{$hosts}) {
501 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
503 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
505 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
506 $sth->{insert_hosts}->execute(
507 $hosts->{$host_key}->{'host'},
508 $hosts->{$host_key}->{'ip'}
511 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
514 print "host ".$hosts->{$host_key}->{'host'}.": ";
516 # get backups for a host
517 my @backups = $bpc->BackupInfoRead($hostname);
518 my $incs = scalar @backups;
519 print "$incs increments\n";
524 foreach my $backup (@backups) {
527 last if ($opt{m} && $inc_nr > $opt{m});
529 my $backupNum = $backup->{'num'};
530 my @backupShares = ();
532 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
533 $hosts->{$host_key}->{'host'},
534 $inc_nr, $incs, $backupNum,
535 $backup->{type} || '?',
536 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
537 strftime($t_fmt,localtime($backup->{startTime})),
538 fmt_time($backup->{endTime} - $backup->{startTime})
541 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
542 foreach my $share ($files->shareList($backupNum)) {
546 $shareID = getShareID($share, $hostID, $hostname);
548 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
549 my ($count) = $sth->{backups_count}->fetchrow_array();
550 # skip if allready in database!
551 next if ($count > 0);
554 print curr_time," ", $share;
556 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
558 $sth->{insert_backups}->execute(
561 $backup->{'endTime'},
562 substr($backup->{'type'},0,4),
570 my $dur = (time() - $t) || 1;
571 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
573 ($size / 1024 / 1024),
578 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
587 print "total duration: ",fmt_time(time() - $start_t),"\n";
593 my ($share, $hostID, $hostname) = @_;
595 $sth->{share_id} ||= $dbh->prepare(qq{
596 SELECT ID FROM shares WHERE hostID=? AND name=?
599 $sth->{share_id}->execute($hostID,$share);
601 my ($id) = $sth->{share_id}->fetchrow_array();
603 return $id if (defined($id));
605 $sth->{insert_share} ||= $dbh->prepare(qq{
607 (hostID,name,share,localpath)
611 my $drop_down = $hostname . '/' . $share;
612 $drop_down =~ s#//+#/#g;
614 $sth->{insert_share}->execute($hostID,$share, $drop_down ,undef);
615 return $dbh->last_insert_id(undef,undef,'shares',undef);
623 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
625 return $beenThere->{$key} if (defined($beenThere->{$key}));
627 $sth->{file_in_db} ||= $dbh->prepare(qq{
629 WHERE shareID = ? and
636 my @param = ($shareID,$path,$date,$size);
637 $sth->{file_in_db}->execute(@param);
638 my $rows = $sth->{file_in_db}->rows;
639 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
641 $beenThere->{$key}++;
643 $sth->{'insert_files'}->execute(@data) unless ($rows);
647 ####################################################
648 # recursing through filesystem structure and #
649 # and returning flattened files list #
650 ####################################################
651 sub recurseDir($$$$$$$$) {
653 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
655 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
657 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
662 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
663 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
665 # first, add all the entries in current directory
666 foreach my $path_key (keys %{$filesInBackup}) {
667 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
672 $filesInBackup->{$path_key}->{'relPath'},
673 $filesInBackup->{$path_key}->{'mtime'},
674 $filesInBackup->{$path_key}->{'type'},
675 $filesInBackup->{$path_key}->{'size'}
678 my $key = join(" ", (
682 $filesInBackup->{$path_key}->{'mtime'},
683 $filesInBackup->{$path_key}->{'size'}
687 if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) {
688 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
690 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
691 $new_dirs++ unless ($found);
692 print STDERR " dir\n" if ($debug >= 2);
694 $new_files++ unless ($found);
695 print STDERR " file\n" if ($debug >= 2);
697 $size += $filesInBackup->{$path_key}->{'size'} || 0;
700 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
703 my $full_path = $dir . '/' . $path_key;
704 push @stack, $full_path;
705 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
707 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
719 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
721 while ( my $dir = shift @stack ) {
722 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
723 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
732 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);