1 #!/usr/local/bin/perl -w
4 use lib "__INSTALLDIR__/lib";
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
17 use constant BPC_FTYPE_DIR => 5;
18 use constant EST_CHUNK => 4096;
20 # daylight saving time change offset for 1h
21 my $dst_offset = 60 * 60;
28 my $pid_path = abs_path($0);
29 $pid_path =~ s/\W+/_/g;
31 my $pidfile = new File::Pid({
32 file => "/tmp/$pid_path",
35 if (my $pid = $pidfile->running ) {
36 die "$0 already running: $pid\n";
37 } elsif ($pidfile->pid ne $$) {
39 $pidfile = new File::Pid;
41 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
44 my $t_fmt = '%Y-%m-%d %H:%M:%S';
47 my $bpc = BackupPC::Lib->new || die;
48 my %Conf = $bpc->Conf();
49 my $TopDir = $bpc->TopDir();
52 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
53 my $user = $Conf{SearchUser} || '';
55 my $index_node_url = $Conf{HyperEstraierIndex};
57 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
61 if ( !getopts("cdm:v:ijfq", \%opt ) ) {
63 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
66 -c create database on first use
67 -d delete database before import
68 -m num import just num increments for one host
69 -v num set verbosity (debug) level (default $debug)
70 -i update Hyper Estraier full text index
71 -j update full text, don't check existing files
72 -f don't do anything with full text index
73 -q be quiet for hosts without changes
75 Option -j is variation on -i. It will allow faster initial creation
76 of full-text index from existing database.
78 Option -f will create database which is out of sync with full text index. You
79 will have to re-run $0 with -i to fix it.
86 print "Debug level at $opt{v}\n";
89 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
90 $index_node_url = undef;
96 my $t = shift || return;
98 my ($ss,$mm,$hh) = gmtime($t);
99 $out .= "${hh}h" if ($hh);
100 $out .= sprintf("%02d:%02d", $mm,$ss);
105 return strftime($t_fmt,localtime());
112 my ($host_id, $share_id, $num) = @_;
114 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
116 unless (defined($index_node_url)) {
117 print STDERR "HyperEstraier support not enabled in configuration\n";
122 print curr_time," updating Hyper Estraier:";
129 print " opening index $index_node_url";
130 if ($index_node_url) {
131 $hest_node ||= Search::Estraier::Node->new(
132 url => $index_node_url,
137 print " via node URL";
139 die "don't know how to use Hyper Estraier Index $index_node_url";
148 if (defined($host_id) && defined($share_id) && defined($num)) {
155 @data = ( $host_id, $share_id, $num );
158 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
160 my $sth = $dbh->prepare(qq{
164 shares.name AS sname,
165 -- shares.share AS sharename,
166 files.backupnum AS backupnum,
167 -- files.name AS filename,
168 files.path AS filepath,
172 files.shareid AS shareid,
173 backups.date AS backup_date
175 INNER JOIN shares ON files.shareID=shares.ID
176 INNER JOIN hosts ON hosts.ID = shares.hostID
177 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
182 $sth->execute(@data);
183 $results = $sth->rows;
186 print " - no new files\n";
193 my $t = shift || return;
194 my $iso = BackupPC::Lib::timeStamp($t);
199 while (my $row = $sth->fetchrow_hashref()) {
201 my $uri = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath};
202 unless ($skip_check) {
203 my $id = $hest_node->uri_to_id($uri);
204 next if ($id && $id == -1);
207 # create a document object
208 my $doc = Search::Estraier::Document->new;
210 # add attributes to the document object
211 $doc->add_attr('@uri', $uri);
213 foreach my $c (@{ $sth->{NAME} }) {
214 print STDERR "attr $c = $row->{$c}\n" if ($debug > 2);
215 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
218 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
220 # add the body text to the document object
221 my $path = $row->{'filepath'};
222 $doc->add_text($path);
223 $path =~ s/(.)/$1 /g;
224 $doc->add_hidden_text($path);
226 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
228 # register the document object to the database
230 $hest_node->put_doc($doc);
239 $offset += EST_CHUNK;
241 } while ($results == EST_CHUNK);
243 my $dur = (time() - $t) || 1;
244 printf(" [%.2f/s dur: %s]\n",
254 if ( ( $opt{i} || $opt{j} ) && !$opt{c} ) {
256 print "force update of Hyper Estraier index ";
257 print "by -i flag" if ($opt{i});
258 print "by -j flag" if ($opt{j});
266 my $index = shift || return;
267 my ($table,$col,$unique) = split(/:/, $index);
270 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
271 $dbh->do(qq{ create $unique index $index on $table($col) });
274 print "creating tables...\n";
278 ID SERIAL PRIMARY KEY,
279 name VARCHAR(30) NOT NULL,
283 create table shares (
284 ID SERIAL PRIMARY KEY,
285 hostID INTEGER NOT NULL references hosts(id),
286 name VARCHAR(30) NOT NULL,
287 share VARCHAR(200) NOT NULL
291 ID SERIAL PRIMARY KEY,
292 num INTEGER NOT NULL,
293 name VARCHAR(255) NOT NULL,
297 create table backups (
299 hostID INTEGER NOT NULL references hosts(id),
300 num INTEGER NOT NULL,
301 date integer NOT NULL,
302 type CHAR(4) not null,
303 shareID integer not null references shares(id),
304 size bigint not null,
305 inc_size bigint not null default -1,
306 inc_deleted boolean default false,
307 parts integer not null default 1,
313 shareID INTEGER NOT NULL references shares(id),
314 backupNum INTEGER NOT NULL,
315 name VARCHAR(255) NOT NULL,
316 path VARCHAR(255) NOT NULL,
317 date integer NOT NULL,
318 type INTEGER NOT NULL,
319 size bigint NOT NULL,
323 create table archive (
326 total_size bigint default -1,
328 username varchar(20) not null,
329 date timestamp default now(),
333 create table archive_backup (
334 archive_id int not null references archive(id) on delete cascade,
335 backup_id int not null references backups(id),
336 primary key(archive_id, backup_id)
339 create table archive_burned (
340 archive_id int references archive(id),
341 date timestamp default now(),
342 part int not null default 1,
343 copy int not null default 1,
344 iso_size bigint default -1
347 create table backup_parts (
349 backup_id int references backups(id),
350 part_nr int not null check (part_nr > 0),
351 tar_size bigint not null check (tar_size > 0),
352 size bigint not null check (size > 0),
354 items int not null check (items > 0),
355 date timestamp default now(),
360 print "creating indexes: ";
362 foreach my $index (qw(
375 archive_burned:archive_id
376 backup_parts:backup_id,part_nr
381 print " creating sequence: ";
382 foreach my $seq (qw/dvd_nr/) {
384 $dbh->do( qq{ CREATE SEQUENCE $seq } );
394 ## delete data before inseting ##
397 foreach my $table (qw(files dvds backups shares hosts)) {
399 $dbh->do(qq{ DELETE FROM $table });
406 ## insert new values ##
409 $hosts = $bpc->HostInfoRead();
415 $sth->{insert_hosts} = $dbh->prepare(qq{
416 INSERT INTO hosts (name, IP) VALUES (?,?)
419 $sth->{hosts_by_name} = $dbh->prepare(qq{
420 SELECT ID FROM hosts WHERE name=?
423 $sth->{backups_count} = $dbh->prepare(qq{
426 WHERE hostID=? AND num=? AND shareid=?
429 $sth->{insert_backups} = $dbh->prepare(qq{
430 INSERT INTO backups (hostID, num, date, type, shareid, size)
431 VALUES (?,?,?,?,?,-1)
434 $sth->{update_backups_size} = $dbh->prepare(qq{
435 UPDATE backups SET size = ?
436 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
439 $sth->{insert_files} = $dbh->prepare(qq{
441 (shareID, backupNum, name, path, date, type, size)
442 VALUES (?,?,?,?,?,?,?)
445 my @hosts = keys %{$hosts};
448 foreach my $host_key (@hosts) {
450 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
452 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
454 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
455 $sth->{insert_hosts}->execute(
456 $hosts->{$host_key}->{'host'},
457 $hosts->{$host_key}->{'ip'}
460 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
464 # get backups for a host
465 my @backups = $bpc->BackupInfoRead($hostname);
466 my $incs = scalar @backups;
468 my $host_header = sprintf("host %s [%d/%d]: %d increments\n",
469 $hosts->{$host_key}->{'host'},
474 print $host_header unless ($opt{q});
479 foreach my $backup (@backups) {
482 last if ($opt{m} && $inc_nr > $opt{m});
484 my $backupNum = $backup->{'num'};
485 my @backupShares = ();
487 my $share_header = sprintf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
488 $hosts->{$host_key}->{'host'},
489 $inc_nr, $incs, $backupNum,
490 $backup->{type} || '?',
491 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
492 strftime($t_fmt,localtime($backup->{startTime})),
493 fmt_time($backup->{endTime} - $backup->{startTime})
495 print $share_header unless ($opt{q});
497 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
498 foreach my $share ($files->shareList($backupNum)) {
502 $shareID = getShareID($share, $hostID, $hostname);
504 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
505 my ($count) = $sth->{backups_count}->fetchrow_array();
506 # skip if allready in database!
507 next if ($count > 0);
509 # dump host and share header for -q
513 $host_header = undef;
519 print curr_time," ", $share;
521 $sth->{insert_backups}->execute(
524 $backup->{'endTime'},
525 substr($backup->{'type'},0,4),
529 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
532 $sth->{update_backups_size}->execute(
536 $backup->{'endTime'},
537 substr($backup->{'type'},0,4),
548 my $dur = (time() - $t) || 1;
549 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
551 ($size / 1024 / 1024),
556 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
565 print "total duration: ",fmt_time(time() - $start_t),"\n";
571 my ($share, $hostID, $hostname) = @_;
573 $sth->{share_id} ||= $dbh->prepare(qq{
574 SELECT ID FROM shares WHERE hostID=? AND name=?
577 $sth->{share_id}->execute($hostID,$share);
579 my ($id) = $sth->{share_id}->fetchrow_array();
581 return $id if (defined($id));
583 $sth->{insert_share} ||= $dbh->prepare(qq{
589 my $drop_down = $hostname . '/' . $share;
590 $drop_down =~ s#//+#/#g;
592 $sth->{insert_share}->execute($hostID,$share, $drop_down);
593 return $dbh->last_insert_id(undef,undef,'shares',undef);
601 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
603 return $beenThere->{$key} if (defined($beenThere->{$key}));
605 $sth->{file_in_db} ||= $dbh->prepare(qq{
607 WHERE shareID = ? and
610 ( date = ? or date = ? or date = ? )
614 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
615 $sth->{file_in_db}->execute(@param);
616 my $rows = $sth->{file_in_db}->rows;
617 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
619 $beenThere->{$key}++;
621 $sth->{'insert_files'}->execute(@data) unless ($rows);
625 ####################################################
626 # recursing through filesystem structure and #
627 # and returning flattened files list #
628 ####################################################
629 sub recurseDir($$$$$$$$) {
631 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
633 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
635 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
640 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
641 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
643 # first, add all the entries in current directory
644 foreach my $path_key (keys %{$filesInBackup}) {
645 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
650 $filesInBackup->{$path_key}->{'relPath'},
651 $filesInBackup->{$path_key}->{'mtime'},
652 $filesInBackup->{$path_key}->{'type'},
653 $filesInBackup->{$path_key}->{'size'}
656 my $key = join(" ", (
660 $filesInBackup->{$path_key}->{'mtime'},
661 $filesInBackup->{$path_key}->{'size'}
664 my $key_dst_prev = join(" ", (
668 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
669 $filesInBackup->{$path_key}->{'size'}
672 my $key_dst_next = join(" ", (
676 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
677 $filesInBackup->{$path_key}->{'size'}
682 ! defined($beenThere->{$key}) &&
683 ! defined($beenThere->{$key_dst_prev}) &&
684 ! defined($beenThere->{$key_dst_next}) &&
685 ! ($found = found_in_db($key, @data))
687 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
689 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
690 $new_dirs++ unless ($found);
691 print STDERR " dir\n" if ($debug >= 2);
693 $new_files++ unless ($found);
694 print STDERR " file\n" if ($debug >= 2);
696 $size += $filesInBackup->{$path_key}->{'size'} || 0;
699 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
702 my $full_path = $dir . '/' . $path_key;
703 push @stack, $full_path;
704 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
706 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
718 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
720 while ( my $dir = shift @stack ) {
721 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
722 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
731 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);