1 #!/usr/local/bin/perl -w
4 use lib "__INSTALLDIR__/lib";
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
17 use constant BPC_FTYPE_DIR => 5;
18 use constant EST_CHUNK => 100000;
20 # daylight saving time change offset for 1h
21 my $dst_offset = 60 * 60;
28 my $pid_path = abs_path($0);
29 $pid_path =~ s/\W+/_/g;
31 my $pidfile = new File::Pid({
32 file => "/tmp/$pid_path",
35 if (my $pid = $pidfile->running ) {
36 die "$0 already running: $pid\n";
37 } elsif ($pidfile->pid ne $$) {
39 $pidfile = new File::Pid;
41 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
44 my $t_fmt = '%Y-%m-%d %H:%M:%S';
47 my $bpc = BackupPC::Lib->new || die;
48 my %Conf = $bpc->Conf();
49 my $TopDir = $bpc->TopDir();
52 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
53 my $user = $Conf{SearchUser} || '';
55 my $index_node_url = $Conf{HyperEstraierIndex};
57 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
61 if ( !getopts("cdm:v:ijfq", \%opt ) ) {
63 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
66 -c create database on first use
67 -d delete database before import
68 -m num import just num increments for one host
69 -v num set verbosity (debug) level (default $debug)
70 -i update Hyper Estraier full text index
71 -j update full text, don't check existing files
72 -f don't do anything with full text index
73 -q be quiet for hosts without changes
75 Option -j is variation on -i. It will allow faster initial creation
76 of full-text index from existing database.
78 Option -f will create database which is out of sync with full text index. You
79 will have to re-run $0 with -i to fix it.
86 print "Debug level at $opt{v}\n";
89 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
90 $index_node_url = undef;
96 my $t = shift || return;
98 my ($ss,$mm,$hh) = gmtime($t);
99 $out .= "${hh}h" if ($hh);
100 $out .= sprintf("%02d:%02d", $mm,$ss);
105 return strftime($t_fmt,localtime());
112 my ($host_id, $share_id, $num) = @_;
114 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
116 unless (defined($index_node_url)) {
117 print STDERR "HyperEstraier support not enabled in configuration\n";
122 print curr_time," updating Hyper Estraier:";
129 print " opening index $index_node_url";
130 if ($index_node_url) {
131 $hest_node ||= Search::Estraier::Node->new(
132 url => $index_node_url,
137 print " via node URL";
139 die "don't know how to use Hyper Estraier Index $index_node_url";
148 if (defined($host_id) && defined($share_id) && defined($num)) {
155 @data = ( $host_id, $share_id, $num );
158 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
160 my $sth = $dbh->prepare(qq{
164 shares.name AS sname,
165 -- shares.share AS sharename,
166 files.backupnum AS backupnum,
167 -- files.name AS filename,
168 files.path AS filepath,
172 files.shareid AS shareid,
173 backups.date AS backup_date
175 INNER JOIN shares ON files.shareID=shares.ID
176 INNER JOIN hosts ON hosts.ID = shares.hostID
177 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
182 $sth->execute(@data);
183 $results = $sth->rows;
186 print " - no new files\n";
189 print " - $results files: ";
193 my $t = shift || return;
194 my $iso = BackupPC::Lib::timeStamp($t);
199 while (my $row = $sth->fetchrow_hashref()) {
201 my $fid = $row->{'fid'} || die "no fid?";
202 my $uri = 'file:///' . $fid;
204 unless ($skip_check) {
205 my $id = $hest_node->uri_to_id($uri);
206 next if ($id && $id == -1);
209 # create a document object
210 my $doc = Search::Estraier::Document->new;
212 # add attributes to the document object
213 $doc->add_attr('@uri', $uri);
215 foreach my $c (@{ $sth->{NAME} }) {
216 print STDERR "attr $c = $row->{$c}\n" if ($debug > 2);
217 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
220 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
222 # add the body text to the document object
223 my $path = $row->{'filepath'};
224 $doc->add_text($path);
225 $path =~ s/(.)/$1 /g;
226 $doc->add_hidden_text($path);
228 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
230 # register the document object to the database
232 $hest_node->put_doc($doc);
241 $offset += EST_CHUNK;
243 } while ($results == EST_CHUNK);
245 my $dur = (time() - $t) || 1;
246 printf(" [%.2f/s dur: %s]\n",
256 if ( ( $opt{i} || $opt{j} ) && !$opt{c} ) {
258 print "force update of Hyper Estraier index ";
259 print "by -i flag" if ($opt{i});
260 print "by -j flag" if ($opt{j});
268 my $index = shift || return;
269 my ($table,$col,$unique) = split(/:/, $index);
272 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
273 $dbh->do(qq{ create $unique index $index on $table($col) });
276 print "creating tables...\n";
280 ID SERIAL PRIMARY KEY,
281 name VARCHAR(30) NOT NULL,
285 create table shares (
286 ID SERIAL PRIMARY KEY,
287 hostID INTEGER NOT NULL references hosts(id),
288 name VARCHAR(30) NOT NULL,
289 share VARCHAR(200) NOT NULL
293 ID SERIAL PRIMARY KEY,
294 num INTEGER NOT NULL,
295 name VARCHAR(255) NOT NULL,
299 create table backups (
301 hostID INTEGER NOT NULL references hosts(id),
302 num INTEGER NOT NULL,
303 date integer NOT NULL,
304 type CHAR(4) not null,
305 shareID integer not null references shares(id),
306 size bigint not null,
307 inc_size bigint not null default -1,
308 inc_deleted boolean default false,
309 parts integer not null default 1,
315 shareID INTEGER NOT NULL references shares(id),
316 backupNum INTEGER NOT NULL,
317 name VARCHAR(255) NOT NULL,
318 path VARCHAR(255) NOT NULL,
319 date integer NOT NULL,
320 type INTEGER NOT NULL,
321 size bigint NOT NULL,
325 create table archive (
328 total_size bigint default -1,
330 username varchar(20) not null,
331 date timestamp default now(),
335 create table archive_backup (
336 archive_id int not null references archive(id) on delete cascade,
337 backup_id int not null references backups(id),
338 primary key(archive_id, backup_id)
341 create table archive_burned (
342 archive_id int references archive(id),
343 date timestamp default now(),
344 part int not null default 1,
345 copy int not null default 1,
346 iso_size bigint default -1
349 create table backup_parts (
351 backup_id int references backups(id),
352 part_nr int not null check (part_nr > 0),
353 tar_size bigint not null check (tar_size > 0),
354 size bigint not null check (size > 0),
356 items int not null check (items > 0),
357 date timestamp default now(),
362 print "creating indexes: ";
364 foreach my $index (qw(
377 archive_burned:archive_id
378 backup_parts:backup_id,part_nr
383 print " creating sequence: ";
384 foreach my $seq (qw/dvd_nr/) {
386 $dbh->do( qq{ CREATE SEQUENCE $seq } );
396 ## delete data before inseting ##
399 foreach my $table (qw(files dvds backups shares hosts)) {
401 $dbh->do(qq{ DELETE FROM $table });
408 ## insert new values ##
411 $hosts = $bpc->HostInfoRead();
417 $sth->{insert_hosts} = $dbh->prepare(qq{
418 INSERT INTO hosts (name, IP) VALUES (?,?)
421 $sth->{hosts_by_name} = $dbh->prepare(qq{
422 SELECT ID FROM hosts WHERE name=?
425 $sth->{backups_count} = $dbh->prepare(qq{
428 WHERE hostID=? AND num=? AND shareid=?
431 $sth->{insert_backups} = $dbh->prepare(qq{
432 INSERT INTO backups (hostID, num, date, type, shareid, size)
433 VALUES (?,?,?,?,?,-1)
436 $sth->{update_backups_size} = $dbh->prepare(qq{
437 UPDATE backups SET size = ?
438 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
441 $sth->{insert_files} = $dbh->prepare(qq{
443 (shareID, backupNum, name, path, date, type, size)
444 VALUES (?,?,?,?,?,?,?)
447 my @hosts = keys %{$hosts};
450 foreach my $host_key (@hosts) {
452 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
454 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
456 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
457 $sth->{insert_hosts}->execute(
458 $hosts->{$host_key}->{'host'},
459 $hosts->{$host_key}->{'ip'}
462 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
466 # get backups for a host
467 my @backups = $bpc->BackupInfoRead($hostname);
468 my $incs = scalar @backups;
470 my $host_header = sprintf("host %s [%d/%d]: %d increments\n",
471 $hosts->{$host_key}->{'host'},
476 print $host_header unless ($opt{q});
481 foreach my $backup (@backups) {
484 last if ($opt{m} && $inc_nr > $opt{m});
486 my $backupNum = $backup->{'num'};
487 my @backupShares = ();
489 my $share_header = sprintf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
490 $hosts->{$host_key}->{'host'},
491 $inc_nr, $incs, $backupNum,
492 $backup->{type} || '?',
493 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
494 strftime($t_fmt,localtime($backup->{startTime})),
495 fmt_time($backup->{endTime} - $backup->{startTime})
497 print $share_header unless ($opt{q});
499 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
500 foreach my $share ($files->shareList($backupNum)) {
504 $shareID = getShareID($share, $hostID, $hostname);
506 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
507 my ($count) = $sth->{backups_count}->fetchrow_array();
508 # skip if allready in database!
509 next if ($count > 0);
511 # dump host and share header for -q
515 $host_header = undef;
521 print curr_time," ", $share;
523 $sth->{insert_backups}->execute(
526 $backup->{'endTime'},
527 substr($backup->{'type'},0,4),
531 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
534 $sth->{update_backups_size}->execute(
538 $backup->{'endTime'},
539 substr($backup->{'type'},0,4),
550 my $dur = (time() - $t) || 1;
551 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
553 ($size / 1024 / 1024),
558 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
567 print "total duration: ",fmt_time(time() - $start_t),"\n";
573 my ($share, $hostID, $hostname) = @_;
575 $sth->{share_id} ||= $dbh->prepare(qq{
576 SELECT ID FROM shares WHERE hostID=? AND name=?
579 $sth->{share_id}->execute($hostID,$share);
581 my ($id) = $sth->{share_id}->fetchrow_array();
583 return $id if (defined($id));
585 $sth->{insert_share} ||= $dbh->prepare(qq{
591 my $drop_down = $hostname . '/' . $share;
592 $drop_down =~ s#//+#/#g;
594 $sth->{insert_share}->execute($hostID,$share, $drop_down);
595 return $dbh->last_insert_id(undef,undef,'shares',undef);
603 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
605 return $beenThere->{$key} if (defined($beenThere->{$key}));
607 $sth->{file_in_db} ||= $dbh->prepare(qq{
609 WHERE shareID = ? and
612 ( date = ? or date = ? or date = ? )
616 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
617 $sth->{file_in_db}->execute(@param);
618 my $rows = $sth->{file_in_db}->rows;
619 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
621 $beenThere->{$key}++;
623 $sth->{'insert_files'}->execute(@data) unless ($rows);
627 ####################################################
628 # recursing through filesystem structure and #
629 # and returning flattened files list #
630 ####################################################
631 sub recurseDir($$$$$$$$) {
633 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
635 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
637 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
642 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
643 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
645 # first, add all the entries in current directory
646 foreach my $path_key (keys %{$filesInBackup}) {
647 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
652 $filesInBackup->{$path_key}->{'relPath'},
653 $filesInBackup->{$path_key}->{'mtime'},
654 $filesInBackup->{$path_key}->{'type'},
655 $filesInBackup->{$path_key}->{'size'}
658 my $key = join(" ", (
662 $filesInBackup->{$path_key}->{'mtime'},
663 $filesInBackup->{$path_key}->{'size'}
666 my $key_dst_prev = join(" ", (
670 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
671 $filesInBackup->{$path_key}->{'size'}
674 my $key_dst_next = join(" ", (
678 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
679 $filesInBackup->{$path_key}->{'size'}
684 ! defined($beenThere->{$key}) &&
685 ! defined($beenThere->{$key_dst_prev}) &&
686 ! defined($beenThere->{$key_dst_next}) &&
687 ! ($found = found_in_db($key, @data))
689 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
691 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
692 $new_dirs++ unless ($found);
693 print STDERR " dir\n" if ($debug >= 2);
695 $new_files++ unless ($found);
696 print STDERR " file\n" if ($debug >= 2);
698 $size += $filesInBackup->{$path_key}->{'size'} || 0;
701 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
704 my $full_path = $dir . '/' . $path_key;
705 push @stack, $full_path;
706 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
708 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
720 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
722 while ( my $dir = shift @stack ) {
723 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
724 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
733 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);