1 #!/usr/local/bin/perl -w
4 use lib "__INSTALLDIR__/lib";
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
16 use constant BPC_FTYPE_DIR => 5;
17 use constant EST_CHUNK => 100000;
19 # daylight saving time change offset for 1h
20 my $dst_offset = 60 * 60;
27 my $pidfile = new File::Pid;
29 if (my $pid = $pidfile->running ) {
30 die "$0 already running: $pid\n";
31 } elsif ($pidfile->pid ne $$) {
33 $pidfile = new File::Pid;
36 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
38 my $t_fmt = '%Y-%m-%d %H:%M:%S';
41 my $bpc = BackupPC::Lib->new || die;
42 my %Conf = $bpc->Conf();
43 my $TopDir = $bpc->TopDir();
46 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
47 my $user = $Conf{SearchUser} || '';
49 my $use_hest = $Conf{HyperEstraierIndex};
50 my ($index_path, $index_node_url) = BackupPC::SearchLib::getHyperEstraier_url($use_hest);
52 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
56 if ( !getopts("cdm:v:ijf", \%opt ) ) {
58 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
61 -c create database on first use
62 -d delete database before import
63 -m num import just num increments for one host
64 -v num set verbosity (debug) level (default $debug)
65 -i update Hyper Estraier full text index
66 -j update full text, don't check existing files
67 -f don't do anything with full text index
69 Option -j is variation on -i. It will allow faster initial creation
70 of full-text index from existing database.
72 Option -f will create database which is out of sync with full text index. You
73 will have to re-run $0 with -i to fix it.
80 print "Debug level at $opt{v}\n";
83 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
84 ($use_hest, $index_path, $index_node_url) = (undef, undef, undef);
90 my $t = shift || return;
92 my ($ss,$mm,$hh) = gmtime($t);
93 $out .= "${hh}h" if ($hh);
94 $out .= sprintf("%02d:%02d", $mm,$ss);
99 return strftime($t_fmt,localtime());
108 print "\nCaught a SIG$sig--syncing database and shutting down\n";
115 $SIG{'INT'} = \&signal;
116 $SIG{'QUIT'} = \&signal;
120 my ($host_id, $share_id, $num) = @_;
122 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
124 unless (defined($use_hest)) {
125 print STDERR "HyperEstraier support not enabled in configuration\n";
130 print curr_time," updating HyperEstraier:";
137 print " opening index $use_hest";
139 $hest_db = HyperEstraier::Database->new();
140 $hest_db->open($TopDir . $index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
142 } elsif ($index_node_url) {
143 $hest_node ||= HyperEstraier::Node->new($index_node_url);
144 $hest_node->set_auth('admin', 'admin');
145 print " via node URL";
147 die "don't know how to use HyperEstraier Index $use_hest";
149 print " increment is " . EST_CHUNK . " files:";
157 if (defined($host_id) && defined($share_id) && defined($num)) {
164 @data = ( $host_id, $share_id, $num );
167 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
169 my $sth = $dbh->prepare(qq{
173 shares.name AS sname,
174 -- shares.share AS sharename,
175 files.backupnum AS backupnum,
176 -- files.name AS filename,
177 files.path AS filepath,
181 files.shareid AS shareid,
182 backups.date AS backup_date
184 INNER JOIN shares ON files.shareID=shares.ID
185 INNER JOIN hosts ON hosts.ID = shares.hostID
186 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
191 $sth->execute(@data);
192 $results = $sth->rows;
195 print " - no new files\n";
200 my $t = shift || return;
201 my $iso = BackupPC::Lib::timeStamp($t);
206 while (my $row = $sth->fetchrow_hashref()) {
208 my $fid = $row->{'fid'} || die "no fid?";
209 my $uri = 'file:///' . $fid;
211 unless ($skip_check) {
212 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
213 next unless ($id == -1);
216 # create a document object
217 my $doc = HyperEstraier::Document->new;
219 # add attributes to the document object
220 $doc->add_attr('@uri', $uri);
222 foreach my $c (@{ $sth->{NAME} }) {
223 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
226 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
228 # add the body text to the document object
229 my $path = $row->{'filepath'};
230 $doc->add_text($path);
231 $path =~ s/(.)/$1 /g;
232 $doc->add_hidden_text($path);
234 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
236 # register the document object to the database
238 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
239 } elsif ($hest_node) {
240 $hest_node->put_doc($doc);
248 $hest_db->sync() if ($index_path);
250 $offset += EST_CHUNK;
252 } while ($results == EST_CHUNK);
259 my $dur = (time() - $t) || 1;
260 printf(" [%.2f/s dur: %s]\n",
270 if (($opt{i} || $opt{j} || ($index_path && ! -e $TopDir . $index_path)) && !$opt{c}) {
272 print "force update of HyperEstraier index ";
273 print "importing existing data" unless (-e $TopDir . $index_path);
274 print "by -i flag" if ($opt{i});
275 print "by -j flag" if ($opt{j});
283 my $index = shift || return;
284 my ($table,$col,$unique) = split(/:/, $index);
287 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
288 $dbh->do(qq{ create $unique index $index on $table($col) });
291 print "creating tables...\n";
295 ID SERIAL PRIMARY KEY,
296 name VARCHAR(30) NOT NULL,
300 create table shares (
301 ID SERIAL PRIMARY KEY,
302 hostID INTEGER NOT NULL references hosts(id),
303 name VARCHAR(30) NOT NULL,
304 share VARCHAR(200) NOT NULL
308 ID SERIAL PRIMARY KEY,
309 num INTEGER NOT NULL,
310 name VARCHAR(255) NOT NULL,
314 create table backups (
316 hostID INTEGER NOT NULL references hosts(id),
317 num INTEGER NOT NULL,
318 date integer NOT NULL,
319 type CHAR(4) not null,
320 shareID integer not null references shares(id),
321 size bigint not null,
322 inc_size bigint not null default -1,
323 inc_deleted boolean default false,
324 parts integer not null default 1,
330 shareID INTEGER NOT NULL references shares(id),
331 backupNum INTEGER NOT NULL,
332 name VARCHAR(255) NOT NULL,
333 path VARCHAR(255) NOT NULL,
334 date integer NOT NULL,
335 type INTEGER NOT NULL,
336 size bigint NOT NULL,
340 create table archive (
343 total_size bigint default -1,
345 username varchar(20) not null,
346 date timestamp default now(),
350 create table archive_backup (
351 archive_id int not null references archive(id) on delete cascade,
352 backup_id int not null references backups(id),
353 primary key(archive_id, backup_id)
356 create table archive_burned (
357 archive_id int references archive(id),
358 date timestamp default now(),
359 part int not null default 1,
360 copy int not null default 1,
361 iso_size bigint default -1
364 create table backup_parts (
366 backup_id int references backups(id),
367 part_nr int not null check (part_nr > 0),
368 tar_size bigint not null check (tar_size > 0),
369 size bigint not null check (size > 0),
371 items int not null check (items > 0),
372 date timestamp default now(),
377 print "creating indexes: ";
379 foreach my $index (qw(
392 archive_burned:archive_id
393 backup_parts:backup_id,part_nr
398 print " creating sequence: ";
399 foreach my $seq (qw/dvd_nr/) {
401 $dbh->do( qq{ CREATE SEQUENCE $seq } );
411 ## delete data before inseting ##
414 foreach my $table (qw(files dvds backups shares hosts)) {
416 $dbh->do(qq{ DELETE FROM $table });
423 ## insert new values ##
426 $hosts = $bpc->HostInfoRead();
432 $sth->{insert_hosts} = $dbh->prepare(qq{
433 INSERT INTO hosts (name, IP) VALUES (?,?)
436 $sth->{hosts_by_name} = $dbh->prepare(qq{
437 SELECT ID FROM hosts WHERE name=?
440 $sth->{backups_count} = $dbh->prepare(qq{
443 WHERE hostID=? AND num=? AND shareid=?
446 $sth->{insert_backups} = $dbh->prepare(qq{
447 INSERT INTO backups (hostID, num, date, type, shareid, size)
448 VALUES (?,?,?,?,?,-1)
451 $sth->{update_backups_size} = $dbh->prepare(qq{
452 UPDATE backups SET size = ?
453 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
456 $sth->{insert_files} = $dbh->prepare(qq{
458 (shareID, backupNum, name, path, date, type, size)
459 VALUES (?,?,?,?,?,?,?)
462 my @hosts = keys %{$hosts};
465 foreach my $host_key (@hosts) {
467 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
469 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
471 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
472 $sth->{insert_hosts}->execute(
473 $hosts->{$host_key}->{'host'},
474 $hosts->{$host_key}->{'ip'}
477 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
481 print "host ", $hosts->{$host_key}->{'host'}, " [",
482 $host_nr, "/", ($#hosts + 1), "]: ";
484 # get backups for a host
485 my @backups = $bpc->BackupInfoRead($hostname);
486 my $incs = scalar @backups;
487 print "$incs increments\n";
492 foreach my $backup (@backups) {
495 last if ($opt{m} && $inc_nr > $opt{m});
497 my $backupNum = $backup->{'num'};
498 my @backupShares = ();
500 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
501 $hosts->{$host_key}->{'host'},
502 $inc_nr, $incs, $backupNum,
503 $backup->{type} || '?',
504 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
505 strftime($t_fmt,localtime($backup->{startTime})),
506 fmt_time($backup->{endTime} - $backup->{startTime})
509 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
510 foreach my $share ($files->shareList($backupNum)) {
514 $shareID = getShareID($share, $hostID, $hostname);
516 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
517 my ($count) = $sth->{backups_count}->fetchrow_array();
518 # skip if allready in database!
519 next if ($count > 0);
522 print curr_time," ", $share;
524 $sth->{insert_backups}->execute(
527 $backup->{'endTime'},
528 substr($backup->{'type'},0,4),
532 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
534 $sth->{update_backups_size}->execute(
538 $backup->{'endTime'},
539 substr($backup->{'type'},0,4),
546 my $dur = (time() - $t) || 1;
547 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
549 ($size / 1024 / 1024),
554 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
563 print "total duration: ",fmt_time(time() - $start_t),"\n";
569 my ($share, $hostID, $hostname) = @_;
571 $sth->{share_id} ||= $dbh->prepare(qq{
572 SELECT ID FROM shares WHERE hostID=? AND name=?
575 $sth->{share_id}->execute($hostID,$share);
577 my ($id) = $sth->{share_id}->fetchrow_array();
579 return $id if (defined($id));
581 $sth->{insert_share} ||= $dbh->prepare(qq{
587 my $drop_down = $hostname . '/' . $share;
588 $drop_down =~ s#//+#/#g;
590 $sth->{insert_share}->execute($hostID,$share, $drop_down);
591 return $dbh->last_insert_id(undef,undef,'shares',undef);
599 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
601 return $beenThere->{$key} if (defined($beenThere->{$key}));
603 $sth->{file_in_db} ||= $dbh->prepare(qq{
605 WHERE shareID = ? and
608 ( date = ? or date = ? or date = ? )
612 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
613 $sth->{file_in_db}->execute(@param);
614 my $rows = $sth->{file_in_db}->rows;
615 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
617 $beenThere->{$key}++;
619 $sth->{'insert_files'}->execute(@data) unless ($rows);
623 ####################################################
624 # recursing through filesystem structure and #
625 # and returning flattened files list #
626 ####################################################
627 sub recurseDir($$$$$$$$) {
629 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
631 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
633 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
638 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
639 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
641 # first, add all the entries in current directory
642 foreach my $path_key (keys %{$filesInBackup}) {
643 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
648 $filesInBackup->{$path_key}->{'relPath'},
649 $filesInBackup->{$path_key}->{'mtime'},
650 $filesInBackup->{$path_key}->{'type'},
651 $filesInBackup->{$path_key}->{'size'}
654 my $key = join(" ", (
658 $filesInBackup->{$path_key}->{'mtime'},
659 $filesInBackup->{$path_key}->{'size'}
662 my $key_dst_prev = join(" ", (
666 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
667 $filesInBackup->{$path_key}->{'size'}
670 my $key_dst_next = join(" ", (
674 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
675 $filesInBackup->{$path_key}->{'size'}
680 ! defined($beenThere->{$key}) &&
681 ! defined($beenThere->{$key_dst_prev}) &&
682 ! defined($beenThere->{$key_dst_next}) &&
683 ! ($found = found_in_db($key, @data))
685 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
687 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
688 $new_dirs++ unless ($found);
689 print STDERR " dir\n" if ($debug >= 2);
691 $new_files++ unless ($found);
692 print STDERR " file\n" if ($debug >= 2);
694 $size += $filesInBackup->{$path_key}->{'size'} || 0;
697 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
700 my $full_path = $dir . '/' . $path_key;
701 push @stack, $full_path;
702 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
704 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
716 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
718 while ( my $dir = shift @stack ) {
719 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
720 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
729 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);