1 #!/usr/local/bin/perl -w
4 use lib "__INSTALLDIR__/lib";
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
16 use constant BPC_FTYPE_DIR => 5;
17 use constant EST_CHUNK => 100000;
19 # daylight saving time change offset for 1h
20 my $dst_offset = 60 * 60;
27 my $pidfile = new File::Pid;
29 if (my $pid = $pidfile->running ) {
30 die "$0 already running: $pid\n";
31 } elsif ($pidfile->pid ne $$) {
33 $pidfile = new File::Pid;
36 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
38 my $t_fmt = '%Y-%m-%d %H:%M:%S';
41 my $bpc = BackupPC::Lib->new || die;
42 my %Conf = $bpc->Conf();
43 my $TopDir = $bpc->TopDir();
46 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
47 my $user = $Conf{SearchUser} || '';
49 my $use_hest = $Conf{HyperEstraierIndex};
50 my ($index_path, $index_node_url) = BackupPC::SearchLib::getHyperEstraier_url($use_hest);
52 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
56 if ( !getopts("cdm:v:ijf", \%opt ) ) {
58 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
61 -c create database on first use
62 -d delete database before import
63 -m num import just num increments for one host
64 -v num set verbosity (debug) level (default $debug)
65 -i update Hyper Estraier full text index
66 -j update full text, don't check existing files
67 -f don't do anything with full text index
69 Option -j is variation on -i. It will allow faster initial creation
70 of full-text index from existing database.
72 Option -f will create database which is out of sync with full text index. You
73 will have to re-run $0 with -i to fix it.
80 print "Debug level at $opt{v}\n";
83 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
84 ($use_hest, $index_path, $index_node_url) = (undef, undef, undef);
90 my $t = shift || return;
92 my ($ss,$mm,$hh) = gmtime($t);
93 $out .= "${hh}h" if ($hh);
94 $out .= sprintf("%02d:%02d", $mm,$ss);
99 return strftime($t_fmt,localtime());
108 print "\nCaught a SIG$sig--syncing database and shutting down\n";
115 $SIG{'INT'} = \&signal;
116 $SIG{'QUIT'} = \&signal;
120 my ($host_id, $share_id, $num) = @_;
122 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
124 unless (defined($use_hest)) {
125 print STDERR "HyperEstraier support not enabled in configuration\n";
130 return unless($use_hest);
132 print curr_time," updating HyperEstraier:";
139 print " opening index $use_hest";
141 $hest_db = HyperEstraier::Database->new();
142 $hest_db->open($TopDir . $index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
144 } elsif ($index_node_url) {
145 $hest_node ||= HyperEstraier::Node->new($index_node_url);
146 $hest_node->set_auth('admin', 'admin');
147 print " via node URL";
149 die "don't know how to use HyperEstraier Index $use_hest";
151 print " increment is " . EST_CHUNK . " files:";
159 if (defined($host_id) && defined($share_id) && defined($num)) {
166 @data = ( $host_id, $share_id, $num );
169 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
171 my $sth = $dbh->prepare(qq{
175 shares.name AS sname,
176 -- shares.share AS sharename,
177 files.backupnum AS backupnum,
178 -- files.name AS filename,
179 files.path AS filepath,
183 files.shareid AS shareid,
184 backups.date AS backup_date
186 INNER JOIN shares ON files.shareID=shares.ID
187 INNER JOIN hosts ON hosts.ID = shares.hostID
188 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
193 $sth->execute(@data);
194 $results = $sth->rows;
197 print " - no new files\n";
202 my $t = shift || return;
203 my $iso = BackupPC::Lib::timeStamp($t);
208 while (my $row = $sth->fetchrow_hashref()) {
210 my $fid = $row->{'fid'} || die "no fid?";
211 my $uri = 'file:///' . $fid;
213 unless ($skip_check) {
214 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
215 next unless ($id == -1);
218 # create a document object
219 my $doc = HyperEstraier::Document->new;
221 # add attributes to the document object
222 $doc->add_attr('@uri', $uri);
224 foreach my $c (@{ $sth->{NAME} }) {
225 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
228 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
230 # add the body text to the document object
231 my $path = $row->{'filepath'};
232 $doc->add_text($path);
233 $path =~ s/(.)/$1 /g;
234 $doc->add_hidden_text($path);
236 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
238 # register the document object to the database
240 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
241 } elsif ($hest_node) {
242 $hest_node->put_doc($doc);
250 $hest_db->sync() if ($index_path);
252 $offset += EST_CHUNK;
254 } while ($results == EST_CHUNK);
261 my $dur = (time() - $t) || 1;
262 printf(" [%.2f/s dur: %s]\n",
272 if (($opt{i} || $opt{j} || ($index_path && ! -e $TopDir . $index_path)) && !$opt{c}) {
274 print "force update of HyperEstraier index ";
275 print "importing existing data" unless (-e $TopDir . $index_path);
276 print "by -i flag" if ($opt{i});
277 print "by -j flag" if ($opt{j});
285 my $index = shift || return;
286 my ($table,$col,$unique) = split(/:/, $index);
289 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
290 $dbh->do(qq{ create $unique index $index on $table($col) });
293 print "creating tables...\n";
297 ID SERIAL PRIMARY KEY,
298 name VARCHAR(30) NOT NULL,
302 create table shares (
303 ID SERIAL PRIMARY KEY,
304 hostID INTEGER NOT NULL references hosts(id),
305 name VARCHAR(30) NOT NULL,
306 share VARCHAR(200) NOT NULL
310 ID SERIAL PRIMARY KEY,
311 num INTEGER NOT NULL,
312 name VARCHAR(255) NOT NULL,
316 create table backups (
318 hostID INTEGER NOT NULL references hosts(id),
319 num INTEGER NOT NULL,
320 date integer NOT NULL,
321 type CHAR(4) not null,
322 shareID integer not null references shares(id),
323 size bigint not null,
324 inc_size bigint not null default -1,
325 inc_deleted boolean default false,
326 parts integer not null default 1,
332 shareID INTEGER NOT NULL references shares(id),
333 backupNum INTEGER NOT NULL,
334 name VARCHAR(255) NOT NULL,
335 path VARCHAR(255) NOT NULL,
336 date integer NOT NULL,
337 type INTEGER NOT NULL,
338 size bigint NOT NULL,
342 create table archive (
345 total_size bigint default -1,
347 username varchar(20) not null,
348 date timestamp default now(),
352 create table archive_backup (
353 archive_id int not null references archive(id) on delete cascade,
354 backup_id int not null references backups(id),
355 primary key(archive_id, backup_id)
358 create table archive_burned (
359 archive_id int references archive(id),
360 date timestamp default now(),
361 part int not null default 1,
362 copy int not null default 1,
363 iso_size bigint default -1
366 create table backup_parts (
368 backup_id int references backups(id),
369 part_nr int not null check (part_nr > 0),
370 tar_size bigint not null check (tar_size > 0),
371 size bigint not null check (size > 0),
373 items int not null check (items > 0),
374 date timestamp default now(),
379 print "creating indexes: ";
381 foreach my $index (qw(
394 archive_burned:archive_id
395 backup_parts:backup_id,part_nr
400 print " creating sequence: ";
401 foreach my $seq (qw/dvd_nr/) {
403 $dbh->do( qq{ CREATE SEQUENCE $seq } );
413 ## delete data before inseting ##
416 foreach my $table (qw(files dvds backups shares hosts)) {
418 $dbh->do(qq{ DELETE FROM $table });
425 ## insert new values ##
428 $hosts = $bpc->HostInfoRead();
434 $sth->{insert_hosts} = $dbh->prepare(qq{
435 INSERT INTO hosts (name, IP) VALUES (?,?)
438 $sth->{hosts_by_name} = $dbh->prepare(qq{
439 SELECT ID FROM hosts WHERE name=?
442 $sth->{backups_count} = $dbh->prepare(qq{
445 WHERE hostID=? AND num=? AND shareid=?
448 $sth->{insert_backups} = $dbh->prepare(qq{
449 INSERT INTO backups (hostID, num, date, type, shareid, size)
450 VALUES (?,?,?,?,?,-1)
453 $sth->{update_backups_size} = $dbh->prepare(qq{
454 UPDATE backups SET size = ?
455 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
458 $sth->{insert_files} = $dbh->prepare(qq{
460 (shareID, backupNum, name, path, date, type, size)
461 VALUES (?,?,?,?,?,?,?)
464 my @hosts = keys %{$hosts};
467 foreach my $host_key (@hosts) {
469 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
471 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
473 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
474 $sth->{insert_hosts}->execute(
475 $hosts->{$host_key}->{'host'},
476 $hosts->{$host_key}->{'ip'}
479 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
483 print "host ", $hosts->{$host_key}->{'host'}, " [",
484 $host_nr, "/", ($#hosts + 1), "]: ";
486 # get backups for a host
487 my @backups = $bpc->BackupInfoRead($hostname);
488 my $incs = scalar @backups;
489 print "$incs increments\n";
494 foreach my $backup (@backups) {
497 last if ($opt{m} && $inc_nr > $opt{m});
499 my $backupNum = $backup->{'num'};
500 my @backupShares = ();
502 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
503 $hosts->{$host_key}->{'host'},
504 $inc_nr, $incs, $backupNum,
505 $backup->{type} || '?',
506 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
507 strftime($t_fmt,localtime($backup->{startTime})),
508 fmt_time($backup->{endTime} - $backup->{startTime})
511 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
512 foreach my $share ($files->shareList($backupNum)) {
516 $shareID = getShareID($share, $hostID, $hostname);
518 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
519 my ($count) = $sth->{backups_count}->fetchrow_array();
520 # skip if allready in database!
521 next if ($count > 0);
524 print curr_time," ", $share;
526 $sth->{insert_backups}->execute(
529 $backup->{'endTime'},
530 substr($backup->{'type'},0,4),
534 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
536 $sth->{update_backups_size}->execute(
540 $backup->{'endTime'},
541 substr($backup->{'type'},0,4),
548 my $dur = (time() - $t) || 1;
549 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
551 ($size / 1024 / 1024),
556 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
565 print "total duration: ",fmt_time(time() - $start_t),"\n";
571 my ($share, $hostID, $hostname) = @_;
573 $sth->{share_id} ||= $dbh->prepare(qq{
574 SELECT ID FROM shares WHERE hostID=? AND name=?
577 $sth->{share_id}->execute($hostID,$share);
579 my ($id) = $sth->{share_id}->fetchrow_array();
581 return $id if (defined($id));
583 $sth->{insert_share} ||= $dbh->prepare(qq{
589 my $drop_down = $hostname . '/' . $share;
590 $drop_down =~ s#//+#/#g;
592 $sth->{insert_share}->execute($hostID,$share, $drop_down);
593 return $dbh->last_insert_id(undef,undef,'shares',undef);
601 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
603 return $beenThere->{$key} if (defined($beenThere->{$key}));
605 $sth->{file_in_db} ||= $dbh->prepare(qq{
607 WHERE shareID = ? and
610 ( date = ? or date = ? or date = ? )
614 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
615 $sth->{file_in_db}->execute(@param);
616 my $rows = $sth->{file_in_db}->rows;
617 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
619 $beenThere->{$key}++;
621 $sth->{'insert_files'}->execute(@data) unless ($rows);
625 ####################################################
626 # recursing through filesystem structure and #
627 # and returning flattened files list #
628 ####################################################
629 sub recurseDir($$$$$$$$) {
631 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
633 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
635 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
640 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
641 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
643 # first, add all the entries in current directory
644 foreach my $path_key (keys %{$filesInBackup}) {
645 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
650 $filesInBackup->{$path_key}->{'relPath'},
651 $filesInBackup->{$path_key}->{'mtime'},
652 $filesInBackup->{$path_key}->{'type'},
653 $filesInBackup->{$path_key}->{'size'}
656 my $key = join(" ", (
660 $filesInBackup->{$path_key}->{'mtime'},
661 $filesInBackup->{$path_key}->{'size'}
664 my $key_dst_prev = join(" ", (
668 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
669 $filesInBackup->{$path_key}->{'size'}
672 my $key_dst_next = join(" ", (
676 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
677 $filesInBackup->{$path_key}->{'size'}
682 ! defined($beenThere->{$key}) &&
683 ! defined($beenThere->{$key_dst_prev}) &&
684 ! defined($beenThere->{$key_dst_next}) &&
685 ! ($found = found_in_db($key, @data))
687 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
689 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
690 $new_dirs++ unless ($found);
691 print STDERR " dir\n" if ($debug >= 2);
693 $new_files++ unless ($found);
694 print STDERR " file\n" if ($debug >= 2);
696 $size += $filesInBackup->{$path_key}->{'size'} || 0;
699 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
702 my $full_path = $dir . '/' . $path_key;
703 push @stack, $full_path;
704 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
706 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
718 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
720 while ( my $dir = shift @stack ) {
721 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
722 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
731 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);