1 #!/usr/local/bin/perl -w
4 use lib "__INSTALLDIR__/lib";
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
16 use constant BPC_FTYPE_DIR => 5;
17 use constant EST_CHUNK => 100000;
24 my $pidfile = new File::Pid;
26 if (my $pid = $pidfile->running ) {
27 die "$0 already running: $pid\n";
28 } elsif ($pidfile->pid ne $$) {
30 $pidfile = new File::Pid;
33 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
35 my $t_fmt = '%Y-%m-%d %H:%M:%S';
38 my $bpc = BackupPC::Lib->new || die;
39 my %Conf = $bpc->Conf();
40 my $TopDir = $bpc->TopDir();
43 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
44 my $user = $Conf{SearchUser} || '';
46 my $use_hest = $Conf{HyperEstraierIndex};
47 my ($index_path, $index_node_url) = BackupPC::SearchLib::getHyperEstraier_url($use_hest);
49 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
53 if ( !getopts("cdm:v:i", \%opt ) ) {
55 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
58 -c create database on first use
59 -d delete database before import
60 -m num import just num increments for one host
61 -v num set verbosity (debug) level (default $debug)
62 -i update HyperEstraier full text index
68 print "Debug level at $opt{v}\n";
75 my $t = shift || return;
77 my ($ss,$mm,$hh) = gmtime($t);
78 $out .= "${hh}h" if ($hh);
79 $out .= sprintf("%02d:%02d", $mm,$ss);
84 return strftime($t_fmt,localtime());
93 print "\nCaught a SIG$sig--syncing database and shutting down\n";
100 $SIG{'INT'} = \&signal;
101 $SIG{'QUIT'} = \&signal;
105 my ($host_id, $share_id, $num) = @_;
108 print STDERR "HyperEstraier support not enabled in configuration\n";
112 print curr_time," updating HyperEstraier:";
119 print " opening index";
121 $hest_db = HyperEstraier::Database->new();
122 $hest_db->open($TopDir . $index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
123 print "$index_path directly";
124 } elsif ($index_node_url) {
125 $hest_node ||= HyperEstraier::Node->new($index_node_url);
126 $hest_node->set_auth('admin', 'admin');
127 print "$index_node_url via node URL";
129 die "don't know how to use HyperEstraier Index $use_hest";
131 print " increment is " . EST_CHUNK . " files:";
139 if ($host_id && $share_id && $num) {
146 @data = ( $host_id, $share_id, $num );
149 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
151 my $sth = $dbh->prepare(qq{
155 shares.name AS sname,
156 -- shares.share AS sharename,
157 files.backupnum AS backupnum,
158 -- files.name AS filename,
159 files.path AS filepath,
163 files.shareid AS shareid,
164 backups.date AS backup_date
166 INNER JOIN shares ON files.shareID=shares.ID
167 INNER JOIN hosts ON hosts.ID = shares.hostID
168 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
173 $sth->execute(@data);
174 $results = $sth->rows;
177 print " - no new files\n";
182 my $t = shift || return;
183 my $iso = BackupPC::Lib::timeStamp($t);
188 while (my $row = $sth->fetchrow_hashref()) {
190 my $fid = $row->{'fid'} || die "no fid?";
191 my $uri = 'file:///' . $fid;
193 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
194 next unless ($id == -1);
196 # create a document object
197 my $doc = HyperEstraier::Document->new;
199 # add attributes to the document object
200 $doc->add_attr('@uri', $uri);
202 foreach my $c (@{ $sth->{NAME} }) {
203 $doc->add_attr($c, $row->{$c}) if ($row->{$c});
206 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
208 # add the body text to the document object
209 my $path = $row->{'filepath'};
210 $doc->add_text($path);
211 $path =~ s/(.)/$1 /g;
212 $doc->add_hidden_text($path);
214 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
216 # register the document object to the database
218 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
219 } elsif ($hest_node) {
220 $hest_node->put_doc($doc);
228 $hest_db->sync() if ($index_path);
230 $offset += EST_CHUNK;
232 } while ($results == EST_CHUNK);
239 my $dur = (time() - $t) || 1;
240 printf(" [%.2f/s dur: %s]\n",
250 if (($opt{i} || ($index_path && ! -e $index_path)) && !$opt{c}) {
252 print "force update of HyperEstraier index ";
253 print "importing existing data" unless (-e $index_path);
254 print "by -i flag" if ($opt{i});
262 my $index = shift || return;
263 my ($table,$col,$unique) = split(/_/, $index);
266 $dbh->do(qq{ create $unique index $index on $table($col) });
269 print "creating tables...\n";
273 ID SERIAL PRIMARY KEY,
274 name VARCHAR(30) NOT NULL,
280 create table shares (
281 ID SERIAL PRIMARY KEY,
282 hostID INTEGER NOT NULL references hosts(id),
283 name VARCHAR(30) NOT NULL,
284 share VARCHAR(200) NOT NULL,
285 localpath VARCHAR(200)
290 create table backups (
291 hostID INTEGER NOT NULL references hosts(id),
292 num INTEGER NOT NULL,
293 date integer NOT NULL,
294 type CHAR(4) not null,
295 shareID integer not null references shares(id),
296 size integer not null,
297 PRIMARY KEY(hostID, num, shareID)
301 #do_index('backups_hostid,num_unique');
305 ID SERIAL PRIMARY KEY,
306 num INTEGER NOT NULL,
307 name VARCHAR(255) NOT NULL,
314 ID SERIAL PRIMARY KEY,
315 shareID INTEGER NOT NULL references shares(id),
316 backupNum INTEGER NOT NULL,
317 name VARCHAR(255) NOT NULL,
318 path VARCHAR(255) NOT NULL,
319 date integer NOT NULL,
320 type INTEGER NOT NULL,
321 size INTEGER NOT NULL,
322 dvdid INTEGER references dvds(id)
326 print "creating indexes:";
328 foreach my $index (qw(
349 ## delete data before inseting ##
352 foreach my $table (qw(files dvds backups shares hosts)) {
354 $dbh->do(qq{ DELETE FROM $table });
361 ## insert new values ##
364 $hosts = $bpc->HostInfoRead();
370 $sth->{insert_hosts} = $dbh->prepare(qq{
371 INSERT INTO hosts (name, IP) VALUES (?,?)
374 $sth->{hosts_by_name} = $dbh->prepare(qq{
375 SELECT ID FROM hosts WHERE name=?
378 $sth->{backups_count} = $dbh->prepare(qq{
381 WHERE hostID=? AND num=? AND shareid=?
384 $sth->{insert_backups} = $dbh->prepare(qq{
385 INSERT INTO backups (hostID, num, date, type, shareid, size)
389 $sth->{insert_files} = $dbh->prepare(qq{
391 (shareID, backupNum, name, path, date, type, size)
392 VALUES (?,?,?,?,?,?,?)
395 foreach my $host_key (keys %{$hosts}) {
397 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
399 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
401 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
402 $sth->{insert_hosts}->execute(
403 $hosts->{$host_key}->{'host'},
404 $hosts->{$host_key}->{'ip'}
407 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
410 print "host ".$hosts->{$host_key}->{'host'}.": ";
412 # get backups for a host
413 my @backups = $bpc->BackupInfoRead($hostname);
414 my $incs = scalar @backups;
415 print "$incs increments\n";
420 foreach my $backup (@backups) {
423 last if ($opt{m} && $inc_nr > $opt{m});
425 my $backupNum = $backup->{'num'};
426 my @backupShares = ();
428 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
429 $hosts->{$host_key}->{'host'},
430 $inc_nr, $incs, $backupNum,
431 $backup->{type} || '?',
432 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
433 strftime($t_fmt,localtime($backup->{startTime})),
434 fmt_time($backup->{endTime} - $backup->{startTime})
437 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
438 foreach my $share ($files->shareList($backupNum)) {
442 $shareID = getShareID($share, $hostID, $hostname);
444 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
445 my ($count) = $sth->{backups_count}->fetchrow_array();
446 # skip if allready in database!
447 next if ($count > 0);
450 print curr_time," ", $share;
452 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
454 $sth->{insert_backups}->execute(
457 $backup->{'endTime'},
466 my $dur = (time() - $t) || 1;
467 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
469 ($size / 1024 / 1024),
474 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
483 print "total duration: ",fmt_time(time() - $start_t),"\n";
489 my ($share, $hostID, $hostname) = @_;
491 $sth->{share_id} ||= $dbh->prepare(qq{
492 SELECT ID FROM shares WHERE hostID=? AND name=?
495 $sth->{share_id}->execute($hostID,$share);
497 my ($id) = $sth->{share_id}->fetchrow_array();
499 return $id if (defined($id));
501 $sth->{insert_share} ||= $dbh->prepare(qq{
503 (hostID,name,share,localpath)
507 my $drop_down = $hostname . '/' . $share;
508 $drop_down =~ s#//+#/#g;
510 $sth->{insert_share}->execute($hostID,$share, $drop_down ,undef);
511 return $dbh->last_insert_id(undef,undef,'shares',undef);
519 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
521 return $beenThere->{$key} if (defined($beenThere->{$key}));
523 $sth->{file_in_db} ||= $dbh->prepare(qq{
525 WHERE shareID = ? and
532 my @param = ($shareID,$path,$date,$size);
533 $sth->{file_in_db}->execute(@param);
534 my $rows = $sth->{file_in_db}->rows;
535 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
537 $beenThere->{$key}++;
539 $sth->{'insert_files'}->execute(@data) unless ($rows);
543 ####################################################
544 # recursing through filesystem structure and #
545 # and returning flattened files list #
546 ####################################################
547 sub recurseDir($$$$$$$$) {
549 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
551 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
553 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
558 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
559 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
561 # first, add all the entries in current directory
562 foreach my $path_key (keys %{$filesInBackup}) {
563 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
568 $filesInBackup->{$path_key}->{'relPath'},
569 $filesInBackup->{$path_key}->{'mtime'},
570 $filesInBackup->{$path_key}->{'type'},
571 $filesInBackup->{$path_key}->{'size'}
574 my $key = join(" ", (
578 $filesInBackup->{$path_key}->{'mtime'},
579 $filesInBackup->{$path_key}->{'size'}
583 if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) {
584 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
586 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
587 $new_dirs++ unless ($found);
588 print STDERR " dir\n" if ($debug >= 2);
590 $new_files++ unless ($found);
591 print STDERR " file\n" if ($debug >= 2);
593 $size += $filesInBackup->{$path_key}->{'size'} || 0;
596 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
599 my $full_path = $dir . '/' . $path_key;
600 push @stack, $full_path;
601 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
603 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
615 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
617 while ( my $dir = shift @stack ) {
618 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
619 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
628 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);