1 #!/usr/local/bin/perl -w
4 use lib "__INSTALLDIR__/lib";
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
16 use constant BPC_FTYPE_DIR => 5;
17 use constant EST_CHUNK => 100000;
24 my $pidfile = new File::Pid;
26 if (my $pid = $pidfile->running ) {
27 die "$0 already running: $pid\n";
28 } elsif ($pidfile->pid ne $$) {
30 $pidfile = new File::Pid;
33 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
35 my $t_fmt = '%Y-%m-%d %H:%M:%S';
38 my $bpc = BackupPC::Lib->new || die;
39 my %Conf = $bpc->Conf();
40 my $TopDir = $bpc->TopDir();
43 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
44 my $user = $Conf{SearchUser} || '';
46 my $use_hest = $Conf{HyperEstraierIndex};
47 my ($index_path, $index_node_url) = BackupPC::SearchLib::getHyperEstraier_url($use_hest);
49 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
53 if ( !getopts("cdm:v:ij", \%opt ) ) {
55 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
58 -c create database on first use
59 -d delete database before import
60 -m num import just num increments for one host
61 -v num set verbosity (debug) level (default $debug)
62 -i update Hyper Estraier full text index
63 -j update full text, don't check existing files
65 Option -j is variation on -i. It will allow faster initial creation
66 of full-text index from existing database.
73 print "Debug level at $opt{v}\n";
80 my $t = shift || return;
82 my ($ss,$mm,$hh) = gmtime($t);
83 $out .= "${hh}h" if ($hh);
84 $out .= sprintf("%02d:%02d", $mm,$ss);
89 return strftime($t_fmt,localtime());
98 print "\nCaught a SIG$sig--syncing database and shutting down\n";
105 $SIG{'INT'} = \&signal;
106 $SIG{'QUIT'} = \&signal;
110 my ($host_id, $share_id, $num) = @_;
112 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
115 print STDERR "HyperEstraier support not enabled in configuration\n";
119 print curr_time," updating HyperEstraier:";
126 print " opening index $use_hest";
128 $hest_db = HyperEstraier::Database->new();
129 $hest_db->open($TopDir . $index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
131 } elsif ($index_node_url) {
132 $hest_node ||= HyperEstraier::Node->new($index_node_url);
133 $hest_node->set_auth('admin', 'admin');
134 print " via node URL";
136 die "don't know how to use HyperEstraier Index $use_hest";
138 print " increment is " . EST_CHUNK . " files:";
146 if (defined($host_id) && defined($share_id) && defined($num)) {
153 @data = ( $host_id, $share_id, $num );
156 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
158 my $sth = $dbh->prepare(qq{
162 shares.name AS sname,
163 -- shares.share AS sharename,
164 files.backupnum AS backupnum,
165 -- files.name AS filename,
166 files.path AS filepath,
170 files.shareid AS shareid,
171 backups.date AS backup_date
173 INNER JOIN shares ON files.shareID=shares.ID
174 INNER JOIN hosts ON hosts.ID = shares.hostID
175 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
180 $sth->execute(@data);
181 $results = $sth->rows;
184 print " - no new files\n";
189 my $t = shift || return;
190 my $iso = BackupPC::Lib::timeStamp($t);
195 while (my $row = $sth->fetchrow_hashref()) {
197 my $fid = $row->{'fid'} || die "no fid?";
198 my $uri = 'file:///' . $fid;
200 unless ($skip_check) {
201 my $id = ($hest_db || $hest_node)->uri_to_id($uri);
202 next unless ($id == -1);
205 # create a document object
206 my $doc = HyperEstraier::Document->new;
208 # add attributes to the document object
209 $doc->add_attr('@uri', $uri);
211 foreach my $c (@{ $sth->{NAME} }) {
212 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
215 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
217 # add the body text to the document object
218 my $path = $row->{'filepath'};
219 $doc->add_text($path);
220 $path =~ s/(.)/$1 /g;
221 $doc->add_hidden_text($path);
223 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
225 # register the document object to the database
227 $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
228 } elsif ($hest_node) {
229 $hest_node->put_doc($doc);
237 $hest_db->sync() if ($index_path);
239 $offset += EST_CHUNK;
241 } while ($results == EST_CHUNK);
248 my $dur = (time() - $t) || 1;
249 printf(" [%.2f/s dur: %s]\n",
259 if (($opt{i} || $opt{j} || ($index_path && ! -e $TopDir . $index_path)) && !$opt{c}) {
261 print "force update of HyperEstraier index ";
262 print "importing existing data" unless (-e $TopDir . $index_path);
263 print "by -i flag" if ($opt{i});
264 print "by -j flag" if ($opt{j});
272 my $index = shift || return;
273 my ($table,$col,$unique) = split(/:/, $index);
276 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
277 $dbh->do(qq{ create $unique index $index on $table($col) });
280 print "creating tables...\n";
284 ID SERIAL PRIMARY KEY,
285 name VARCHAR(30) NOT NULL,
289 create table shares (
290 ID SERIAL PRIMARY KEY,
291 hostID INTEGER NOT NULL references hosts(id),
292 name VARCHAR(30) NOT NULL,
293 share VARCHAR(200) NOT NULL
297 ID SERIAL PRIMARY KEY,
298 num INTEGER NOT NULL,
299 name VARCHAR(255) NOT NULL,
303 create table backups (
305 hostID INTEGER NOT NULL references hosts(id),
306 num INTEGER NOT NULL,
307 date integer NOT NULL,
308 type CHAR(4) not null,
309 shareID integer not null references shares(id),
310 size bigint not null,
311 inc_size bigint not null default -1,
312 inc_deleted boolean default false,
313 parts integer not null default 1,
319 shareID INTEGER NOT NULL references shares(id),
320 backupNum INTEGER NOT NULL,
321 name VARCHAR(255) NOT NULL,
322 path VARCHAR(255) NOT NULL,
323 date integer NOT NULL,
324 type INTEGER NOT NULL,
325 size bigint NOT NULL,
329 create table archive (
332 total_size bigint default -1,
334 username varchar(20) not null,
335 date timestamp default now(),
339 create table archive_backup (
340 archive_id int not null references archive(id) on delete cascade,
341 backup_id int not null references backups(id),
342 primary key(archive_id, backup_id)
345 create table archive_burned (
346 archive_id int references archive(id),
347 date timestamp default now(),
348 part int not null default 1,
349 copy int not null default 1,
350 iso_size bigint default -1
353 create table backup_parts (
355 backup_id int references backups(id),
356 part_nr int not null check (part_nr > 0),
357 tar_size bigint not null check (tar_size > 0),
358 size bigint not null check (size > 0),
360 items int not null check (items > 0),
361 date timestamp default now(),
366 print "creating indexes: ";
368 foreach my $index (qw(
381 archive_burned:archive_id
382 backup_parts:backup_id,part_nr
387 print " creating sequence: ";
388 foreach my $seq (qw/dvd_nr/) {
390 $dbh->do( qq{ CREATE SEQUENCE $seq } );
400 ## delete data before inseting ##
403 foreach my $table (qw(files dvds backups shares hosts)) {
405 $dbh->do(qq{ DELETE FROM $table });
412 ## insert new values ##
415 $hosts = $bpc->HostInfoRead();
421 $sth->{insert_hosts} = $dbh->prepare(qq{
422 INSERT INTO hosts (name, IP) VALUES (?,?)
425 $sth->{hosts_by_name} = $dbh->prepare(qq{
426 SELECT ID FROM hosts WHERE name=?
429 $sth->{backups_count} = $dbh->prepare(qq{
432 WHERE hostID=? AND num=? AND shareid=?
435 $sth->{insert_backups} = $dbh->prepare(qq{
436 INSERT INTO backups (hostID, num, date, type, shareid, size)
440 $sth->{insert_files} = $dbh->prepare(qq{
442 (shareID, backupNum, name, path, date, type, size)
443 VALUES (?,?,?,?,?,?,?)
446 foreach my $host_key (keys %{$hosts}) {
448 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
450 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
452 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
453 $sth->{insert_hosts}->execute(
454 $hosts->{$host_key}->{'host'},
455 $hosts->{$host_key}->{'ip'}
458 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
461 print "host ".$hosts->{$host_key}->{'host'}.": ";
463 # get backups for a host
464 my @backups = $bpc->BackupInfoRead($hostname);
465 my $incs = scalar @backups;
466 print "$incs increments\n";
471 foreach my $backup (@backups) {
474 last if ($opt{m} && $inc_nr > $opt{m});
476 my $backupNum = $backup->{'num'};
477 my @backupShares = ();
479 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
480 $hosts->{$host_key}->{'host'},
481 $inc_nr, $incs, $backupNum,
482 $backup->{type} || '?',
483 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
484 strftime($t_fmt,localtime($backup->{startTime})),
485 fmt_time($backup->{endTime} - $backup->{startTime})
488 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
489 foreach my $share ($files->shareList($backupNum)) {
493 $shareID = getShareID($share, $hostID, $hostname);
495 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
496 my ($count) = $sth->{backups_count}->fetchrow_array();
497 # skip if allready in database!
498 next if ($count > 0);
501 print curr_time," ", $share;
503 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
505 $sth->{insert_backups}->execute(
508 $backup->{'endTime'},
509 substr($backup->{'type'},0,4),
517 my $dur = (time() - $t) || 1;
518 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
520 ($size / 1024 / 1024),
525 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
534 print "total duration: ",fmt_time(time() - $start_t),"\n";
540 my ($share, $hostID, $hostname) = @_;
542 $sth->{share_id} ||= $dbh->prepare(qq{
543 SELECT ID FROM shares WHERE hostID=? AND name=?
546 $sth->{share_id}->execute($hostID,$share);
548 my ($id) = $sth->{share_id}->fetchrow_array();
550 return $id if (defined($id));
552 $sth->{insert_share} ||= $dbh->prepare(qq{
558 my $drop_down = $hostname . '/' . $share;
559 $drop_down =~ s#//+#/#g;
561 $sth->{insert_share}->execute($hostID,$share, $drop_down);
562 return $dbh->last_insert_id(undef,undef,'shares',undef);
570 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
572 return $beenThere->{$key} if (defined($beenThere->{$key}));
574 $sth->{file_in_db} ||= $dbh->prepare(qq{
576 WHERE shareID = ? and
583 my @param = ($shareID,$path,$date,$size);
584 $sth->{file_in_db}->execute(@param);
585 my $rows = $sth->{file_in_db}->rows;
586 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
588 $beenThere->{$key}++;
590 $sth->{'insert_files'}->execute(@data) unless ($rows);
594 ####################################################
595 # recursing through filesystem structure and #
596 # and returning flattened files list #
597 ####################################################
598 sub recurseDir($$$$$$$$) {
600 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
602 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
604 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
609 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
610 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
612 # first, add all the entries in current directory
613 foreach my $path_key (keys %{$filesInBackup}) {
614 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
619 $filesInBackup->{$path_key}->{'relPath'},
620 $filesInBackup->{$path_key}->{'mtime'},
621 $filesInBackup->{$path_key}->{'type'},
622 $filesInBackup->{$path_key}->{'size'}
625 my $key = join(" ", (
629 $filesInBackup->{$path_key}->{'mtime'},
630 $filesInBackup->{$path_key}->{'size'}
633 # daylight saving time change offset for 1h
634 my $dst_offset = 60 * 60;
636 my $key_dst1 = join(" ", (
640 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
641 $filesInBackup->{$path_key}->{'size'}
644 my $key_dst2 = join(" ", (
648 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
649 $filesInBackup->{$path_key}->{'size'}
654 ! defined($beenThere->{$key}) &&
655 ! defined($beenThere->{$key_dst1}) &&
656 ! defined($beenThere->{$key_dst2}) &&
657 ! ($found = found_in_db($key, @data))
659 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
661 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
662 $new_dirs++ unless ($found);
663 print STDERR " dir\n" if ($debug >= 2);
665 $new_files++ unless ($found);
666 print STDERR " file\n" if ($debug >= 2);
668 $size += $filesInBackup->{$path_key}->{'size'} || 0;
671 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
674 my $full_path = $dir . '/' . $path_key;
675 push @stack, $full_path;
676 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
678 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
690 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
692 while ( my $dir = shift @stack ) {
693 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
694 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
703 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);