1 #!/usr/local/bin/perl -w
4 use lib "__INSTALLDIR__/lib";
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
16 use constant BPC_FTYPE_DIR => 5;
17 use constant EST_CHUNK => 100000;
24 my $pidfile = new File::Pid;
26 if (my $pid = $pidfile->running ) {
27 die "$0 already running: $pid\n";
28 } elsif ($pidfile->pid ne $$) {
30 $pidfile = new File::Pid;
33 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
35 my $t_fmt = '%Y-%m-%d %H:%M:%S';
38 my $bpc = BackupPC::Lib->new || die;
39 my %Conf = $bpc->Conf();
40 my $TopDir = $bpc->TopDir();
43 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
44 my $user = $Conf{SearchUser} || '';
46 my $use_hest = $Conf{HyperEstraierIndex};
47 my ($index_path, $index_node_url) = BackupPC::SearchLib::getHyperEstraier_url($use_hest);
49 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
53 if ( !getopts("cdm:v:ij", \%opt ) ) {
55 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
58 -c create database on first use
59 -d delete database before import
60 -m num import just num increments for one host
61 -v num set verbosity (debug) level (default $debug)
62 -i update Hyper Estraier full text index
63 -j update full text, don't check existing files
65 Option -j is variation on -i. It will allow faster initial creation
66 of full-text index from existing database.
73 print "Debug level at $opt{v}\n";
80 my $t = shift || return;
82 my ($ss,$mm,$hh) = gmtime($t);
83 $out .= "${hh}h" if ($hh);
84 $out .= sprintf("%02d:%02d", $mm,$ss);
89 return strftime($t_fmt,localtime());
98 print "\nCaught a SIG$sig--syncing database and shutting down\n";
105 $SIG{'INT'} = \&signal;
106 $SIG{'QUIT'} = \&signal;
110 my ($host_id, $share_id, $num) = @_;
112 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
115 print STDERR "HyperEstraier support not enabled in configuration\n";
119 print curr_time," updating HyperEstraier:";
126 print " opening index $use_hest";
128 $hest_db = HyperEstraier::Database->new();
129 $hest_db->open($TopDir . $index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
131 } elsif ($index_node_url) {
132 $hest_node ||= HyperEstraier::Node->new($index_node_url);
133 $hest_node->set_auth('admin', 'admin');
134 print " via node URL";
136 die "don't know how to use HyperEstraier Index $use_hest";
138 print " increment is " . EST_CHUNK . " files:";
146 if ($host_id && $share_id && $num) {
153 @data = ( $host_id, $share_id, $num );
156 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
158 my $sth = $dbh->prepare(qq{
162 shares.name AS sname,
163 -- shares.share AS sharename,
164 files.backupnum AS backupnum,
165 -- files.name AS filename,
166 files.path AS filepath,
170 files.shareid AS shareid,
171 backups.date AS backup_date
173 INNER JOIN shares ON files.shareID=shares.ID
174 INNER JOIN hosts ON hosts.ID = shares.hostID
175 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
180 $sth->execute(@data);
181 $results = $sth->rows;
184 print " - no new files\n";
189 my $t = shift || return;
190 my $iso = BackupPC::Lib::timeStamp($t);
195 while (my $row = $sth->fetchrow_hashref()) {
197 my $fid = $row->{'fid'} || die "no fid?";
198 my $uri = 'file:///' . $fid;
200 unless ($skip_check) {
201 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
202 next unless ($id == -1);
205 # create a document object
206 my $doc = HyperEstraier::Document->new;
208 # add attributes to the document object
209 $doc->add_attr('@uri', $uri);
211 foreach my $c (@{ $sth->{NAME} }) {
212 $doc->add_attr($c, $row->{$c}) if ($row->{$c});
215 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
217 # add the body text to the document object
218 my $path = $row->{'filepath'};
219 $doc->add_text($path);
220 $path =~ s/(.)/$1 /g;
221 $doc->add_hidden_text($path);
223 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
225 # register the document object to the database
227 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
228 } elsif ($hest_node) {
229 $hest_node->put_doc($doc);
237 $hest_db->sync() if ($index_path);
239 $offset += EST_CHUNK;
241 } while ($results == EST_CHUNK);
248 my $dur = (time() - $t) || 1;
249 printf(" [%.2f/s dur: %s]\n",
259 if (($opt{i} || $opt{j} || ($index_path && ! -e $index_path)) && !$opt{c}) {
261 print "force update of HyperEstraier index ";
262 print "importing existing data" unless (-e $index_path);
263 print "by -i flag" if ($opt{i});
264 print "by -j flag" if ($opt{j});
272 my $index = shift || return;
273 my ($table,$col,$unique) = split(/:/, $index);
276 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
277 $dbh->do(qq{ create $unique index $index on $table($col) });
280 print "creating tables...\n";
284 ID SERIAL PRIMARY KEY,
285 name VARCHAR(30) NOT NULL,
291 create table shares (
292 ID SERIAL PRIMARY KEY,
293 hostID INTEGER NOT NULL references hosts(id),
294 name VARCHAR(30) NOT NULL,
295 share VARCHAR(200) NOT NULL
301 ID SERIAL PRIMARY KEY,
302 num INTEGER NOT NULL,
303 name VARCHAR(255) NOT NULL,
309 create table backups (
311 hostID INTEGER NOT NULL references hosts(id),
312 num INTEGER NOT NULL,
313 date integer NOT NULL,
314 type CHAR(4) not null,
315 shareID integer not null references shares(id),
316 size bigint not null,
317 inc_size bigint not null default -1,
318 inc_deleted boolean default false,
326 shareID INTEGER NOT NULL references shares(id),
327 backupNum INTEGER NOT NULL,
328 name VARCHAR(255) NOT NULL,
329 path VARCHAR(255) NOT NULL,
330 date integer NOT NULL,
331 type INTEGER NOT NULL,
332 size bigint NOT NULL,
339 create table archive (
342 total_size bigint default -1,
344 username varchar(20) not null,
345 date timestamp default now(),
352 create table archive_backup
354 archive_id int not null references archive(id),
355 backup_id int not null references backups(id),
356 primary key(archive_id, backup_id)
360 print "creating indexes: ";
362 foreach my $index (qw(
379 print " creating sequence: ";
380 foreach my $seq (qw/dvd_nr/) {
382 $dbh->do( qq{ CREATE SEQUENCE $seq } );
392 ## delete data before inseting ##
395 foreach my $table (qw(files dvds backups shares hosts)) {
397 $dbh->do(qq{ DELETE FROM $table });
404 ## insert new values ##
407 $hosts = $bpc->HostInfoRead();
413 $sth->{insert_hosts} = $dbh->prepare(qq{
414 INSERT INTO hosts (name, IP) VALUES (?,?)
417 $sth->{hosts_by_name} = $dbh->prepare(qq{
418 SELECT ID FROM hosts WHERE name=?
421 $sth->{backups_count} = $dbh->prepare(qq{
424 WHERE hostID=? AND num=? AND shareid=?
427 $sth->{insert_backups} = $dbh->prepare(qq{
428 INSERT INTO backups (hostID, num, date, type, shareid, size)
432 $sth->{insert_files} = $dbh->prepare(qq{
434 (shareID, backupNum, name, path, date, type, size)
435 VALUES (?,?,?,?,?,?,?)
438 foreach my $host_key (keys %{$hosts}) {
440 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
442 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
444 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
445 $sth->{insert_hosts}->execute(
446 $hosts->{$host_key}->{'host'},
447 $hosts->{$host_key}->{'ip'}
450 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
453 print "host ".$hosts->{$host_key}->{'host'}.": ";
455 # get backups for a host
456 my @backups = $bpc->BackupInfoRead($hostname);
457 my $incs = scalar @backups;
458 print "$incs increments\n";
463 foreach my $backup (@backups) {
466 last if ($opt{m} && $inc_nr > $opt{m});
468 my $backupNum = $backup->{'num'};
469 my @backupShares = ();
471 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
472 $hosts->{$host_key}->{'host'},
473 $inc_nr, $incs, $backupNum,
474 $backup->{type} || '?',
475 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
476 strftime($t_fmt,localtime($backup->{startTime})),
477 fmt_time($backup->{endTime} - $backup->{startTime})
480 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
481 foreach my $share ($files->shareList($backupNum)) {
485 $shareID = getShareID($share, $hostID, $hostname);
487 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
488 my ($count) = $sth->{backups_count}->fetchrow_array();
489 # skip if allready in database!
490 next if ($count > 0);
493 print curr_time," ", $share;
495 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
497 $sth->{insert_backups}->execute(
500 $backup->{'endTime'},
501 substr($backup->{'type'},0,4),
509 my $dur = (time() - $t) || 1;
510 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
512 ($size / 1024 / 1024),
517 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
526 print "total duration: ",fmt_time(time() - $start_t),"\n";
532 my ($share, $hostID, $hostname) = @_;
534 $sth->{share_id} ||= $dbh->prepare(qq{
535 SELECT ID FROM shares WHERE hostID=? AND name=?
538 $sth->{share_id}->execute($hostID,$share);
540 my ($id) = $sth->{share_id}->fetchrow_array();
542 return $id if (defined($id));
544 $sth->{insert_share} ||= $dbh->prepare(qq{
550 my $drop_down = $hostname . '/' . $share;
551 $drop_down =~ s#//+#/#g;
553 $sth->{insert_share}->execute($hostID,$share, $drop_down);
554 return $dbh->last_insert_id(undef,undef,'shares',undef);
562 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
564 return $beenThere->{$key} if (defined($beenThere->{$key}));
566 $sth->{file_in_db} ||= $dbh->prepare(qq{
568 WHERE shareID = ? and
575 my @param = ($shareID,$path,$date,$size);
576 $sth->{file_in_db}->execute(@param);
577 my $rows = $sth->{file_in_db}->rows;
578 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
580 $beenThere->{$key}++;
582 $sth->{'insert_files'}->execute(@data) unless ($rows);
586 ####################################################
587 # recursing through filesystem structure and #
588 # and returning flattened files list #
589 ####################################################
590 sub recurseDir($$$$$$$$) {
592 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
594 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
596 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
601 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
602 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
604 # first, add all the entries in current directory
605 foreach my $path_key (keys %{$filesInBackup}) {
606 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
611 $filesInBackup->{$path_key}->{'relPath'},
612 $filesInBackup->{$path_key}->{'mtime'},
613 $filesInBackup->{$path_key}->{'type'},
614 $filesInBackup->{$path_key}->{'size'}
617 my $key = join(" ", (
621 $filesInBackup->{$path_key}->{'mtime'},
622 $filesInBackup->{$path_key}->{'size'}
626 if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) {
627 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
629 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
630 $new_dirs++ unless ($found);
631 print STDERR " dir\n" if ($debug >= 2);
633 $new_files++ unless ($found);
634 print STDERR " file\n" if ($debug >= 2);
636 $size += $filesInBackup->{$path_key}->{'size'} || 0;
639 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
642 my $full_path = $dir . '/' . $path_key;
643 push @stack, $full_path;
644 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
646 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
658 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
660 while ( my $dir = shift @stack ) {
661 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
662 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
671 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);