1 #!/usr/local/bin/perl -w
4 use lib "__INSTALLDIR__/lib";
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
17 use constant BPC_FTYPE_DIR => 5;
18 use constant EST_CHUNK => 100000;
20 # daylight saving time change offset for 1h
21 my $dst_offset = 60 * 60;
28 my $pid_path = abs_path($0);
29 $pid_path =~ s/\W+/_/g;
31 my $pidfile = new File::Pid({
32 file => "/tmp/$pid_path",
35 if (my $pid = $pidfile->running ) {
36 die "$0 already running: $pid\n";
37 } elsif ($pidfile->pid ne $$) {
39 $pidfile = new File::Pid;
41 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
44 my $t_fmt = '%Y-%m-%d %H:%M:%S';
47 my $bpc = BackupPC::Lib->new || die;
48 my %Conf = $bpc->Conf();
49 my $TopDir = $bpc->TopDir();
52 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
53 my $user = $Conf{SearchUser} || '';
55 my $index_node_url = $Conf{HyperEstraierIndex};
57 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
61 if ( !getopts("cdm:v:ijf", \%opt ) ) {
63 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
66 -c create database on first use
67 -d delete database before import
68 -m num import just num increments for one host
69 -v num set verbosity (debug) level (default $debug)
70 -i update Hyper Estraier full text index
71 -j update full text, don't check existing files
72 -f don't do anything with full text index
74 Option -j is variation on -i. It will allow faster initial creation
75 of full-text index from existing database.
77 Option -f will create database which is out of sync with full text index. You
78 will have to re-run $0 with -i to fix it.
85 print "Debug level at $opt{v}\n";
88 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
89 $index_node_url = undef;
95 my $t = shift || return;
97 my ($ss,$mm,$hh) = gmtime($t);
98 $out .= "${hh}h" if ($hh);
99 $out .= sprintf("%02d:%02d", $mm,$ss);
104 return strftime($t_fmt,localtime());
111 my ($host_id, $share_id, $num) = @_;
113 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
115 unless (defined($index_node_url)) {
116 print STDERR "HyperEstraier support not enabled in configuration\n";
121 print curr_time," updating Hyper Estraier:";
128 print " opening index $index_node_url";
129 if ($index_node_url) {
130 $hest_node ||= Search::Estraier::Node->new(
131 url => $index_node_url,
136 print " via node URL";
138 die "don't know how to use Hyper Estraier Index $index_node_url";
147 if (defined($host_id) && defined($share_id) && defined($num)) {
154 @data = ( $host_id, $share_id, $num );
157 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
159 my $sth = $dbh->prepare(qq{
163 shares.name AS sname,
164 -- shares.share AS sharename,
165 files.backupnum AS backupnum,
166 -- files.name AS filename,
167 files.path AS filepath,
171 files.shareid AS shareid,
172 backups.date AS backup_date
174 INNER JOIN shares ON files.shareID=shares.ID
175 INNER JOIN hosts ON hosts.ID = shares.hostID
176 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
181 $sth->execute(@data);
182 $results = $sth->rows;
185 print " - no new files\n";
188 print " - $results files: ";
192 my $t = shift || return;
193 my $iso = BackupPC::Lib::timeStamp($t);
198 while (my $row = $sth->fetchrow_hashref()) {
200 my $fid = $row->{'fid'} || die "no fid?";
201 my $uri = 'file:///' . $fid;
203 unless ($skip_check) {
204 my $id = $hest_node->uri_to_id($uri);
205 next if ($id && $id == -1);
208 # create a document object
209 my $doc = Search::Estraier::Document->new;
211 # add attributes to the document object
212 $doc->add_attr('@uri', $uri);
214 foreach my $c (@{ $sth->{NAME} }) {
215 print STDERR "attr $c = $row->{$c}\n" if ($debug > 2);
216 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
219 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
221 # add the body text to the document object
222 my $path = $row->{'filepath'};
223 $doc->add_text($path);
224 $path =~ s/(.)/$1 /g;
225 $doc->add_hidden_text($path);
227 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
229 # register the document object to the database
231 $hest_node->put_doc($doc);
240 $offset += EST_CHUNK;
242 } while ($results == EST_CHUNK);
244 my $dur = (time() - $t) || 1;
245 printf(" [%.2f/s dur: %s]\n",
255 if ( ( $opt{i} || $opt{j} ) && !$opt{c} ) {
257 print "force update of Hyper Estraier index ";
258 print "by -i flag" if ($opt{i});
259 print "by -j flag" if ($opt{j});
267 my $index = shift || return;
268 my ($table,$col,$unique) = split(/:/, $index);
271 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
272 $dbh->do(qq{ create $unique index $index on $table($col) });
275 print "creating tables...\n";
279 ID SERIAL PRIMARY KEY,
280 name VARCHAR(30) NOT NULL,
284 create table shares (
285 ID SERIAL PRIMARY KEY,
286 hostID INTEGER NOT NULL references hosts(id),
287 name VARCHAR(30) NOT NULL,
288 share VARCHAR(200) NOT NULL
292 ID SERIAL PRIMARY KEY,
293 num INTEGER NOT NULL,
294 name VARCHAR(255) NOT NULL,
298 create table backups (
300 hostID INTEGER NOT NULL references hosts(id),
301 num INTEGER NOT NULL,
302 date integer NOT NULL,
303 type CHAR(4) not null,
304 shareID integer not null references shares(id),
305 size bigint not null,
306 inc_size bigint not null default -1,
307 inc_deleted boolean default false,
308 parts integer not null default 1,
314 shareID INTEGER NOT NULL references shares(id),
315 backupNum INTEGER NOT NULL,
316 name VARCHAR(255) NOT NULL,
317 path VARCHAR(255) NOT NULL,
318 date integer NOT NULL,
319 type INTEGER NOT NULL,
320 size bigint NOT NULL,
324 create table archive (
327 total_size bigint default -1,
329 username varchar(20) not null,
330 date timestamp default now(),
334 create table archive_backup (
335 archive_id int not null references archive(id) on delete cascade,
336 backup_id int not null references backups(id),
337 primary key(archive_id, backup_id)
340 create table archive_burned (
341 archive_id int references archive(id),
342 date timestamp default now(),
343 part int not null default 1,
344 copy int not null default 1,
345 iso_size bigint default -1
348 create table backup_parts (
350 backup_id int references backups(id),
351 part_nr int not null check (part_nr > 0),
352 tar_size bigint not null check (tar_size > 0),
353 size bigint not null check (size > 0),
355 items int not null check (items > 0),
356 date timestamp default now(),
361 print "creating indexes: ";
363 foreach my $index (qw(
376 archive_burned:archive_id
377 backup_parts:backup_id,part_nr
382 print " creating sequence: ";
383 foreach my $seq (qw/dvd_nr/) {
385 $dbh->do( qq{ CREATE SEQUENCE $seq } );
395 ## delete data before inseting ##
398 foreach my $table (qw(files dvds backups shares hosts)) {
400 $dbh->do(qq{ DELETE FROM $table });
407 ## insert new values ##
410 $hosts = $bpc->HostInfoRead();
416 $sth->{insert_hosts} = $dbh->prepare(qq{
417 INSERT INTO hosts (name, IP) VALUES (?,?)
420 $sth->{hosts_by_name} = $dbh->prepare(qq{
421 SELECT ID FROM hosts WHERE name=?
424 $sth->{backups_count} = $dbh->prepare(qq{
427 WHERE hostID=? AND num=? AND shareid=?
430 $sth->{insert_backups} = $dbh->prepare(qq{
431 INSERT INTO backups (hostID, num, date, type, shareid, size)
432 VALUES (?,?,?,?,?,-1)
435 $sth->{update_backups_size} = $dbh->prepare(qq{
436 UPDATE backups SET size = ?
437 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
440 $sth->{insert_files} = $dbh->prepare(qq{
442 (shareID, backupNum, name, path, date, type, size)
443 VALUES (?,?,?,?,?,?,?)
446 my @hosts = keys %{$hosts};
449 foreach my $host_key (@hosts) {
451 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
453 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
455 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
456 $sth->{insert_hosts}->execute(
457 $hosts->{$host_key}->{'host'},
458 $hosts->{$host_key}->{'ip'}
461 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
465 print "host ", $hosts->{$host_key}->{'host'}, " [",
466 $host_nr, "/", ($#hosts + 1), "]: ";
468 # get backups for a host
469 my @backups = $bpc->BackupInfoRead($hostname);
470 my $incs = scalar @backups;
471 print "$incs increments\n";
476 foreach my $backup (@backups) {
479 last if ($opt{m} && $inc_nr > $opt{m});
481 my $backupNum = $backup->{'num'};
482 my @backupShares = ();
484 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
485 $hosts->{$host_key}->{'host'},
486 $inc_nr, $incs, $backupNum,
487 $backup->{type} || '?',
488 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
489 strftime($t_fmt,localtime($backup->{startTime})),
490 fmt_time($backup->{endTime} - $backup->{startTime})
493 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
494 foreach my $share ($files->shareList($backupNum)) {
498 $shareID = getShareID($share, $hostID, $hostname);
500 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
501 my ($count) = $sth->{backups_count}->fetchrow_array();
502 # skip if allready in database!
503 next if ($count > 0);
506 print curr_time," ", $share;
508 $sth->{insert_backups}->execute(
511 $backup->{'endTime'},
512 substr($backup->{'type'},0,4),
516 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
519 $sth->{update_backups_size}->execute(
523 $backup->{'endTime'},
524 substr($backup->{'type'},0,4),
535 my $dur = (time() - $t) || 1;
536 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
538 ($size / 1024 / 1024),
543 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
552 print "total duration: ",fmt_time(time() - $start_t),"\n";
558 my ($share, $hostID, $hostname) = @_;
560 $sth->{share_id} ||= $dbh->prepare(qq{
561 SELECT ID FROM shares WHERE hostID=? AND name=?
564 $sth->{share_id}->execute($hostID,$share);
566 my ($id) = $sth->{share_id}->fetchrow_array();
568 return $id if (defined($id));
570 $sth->{insert_share} ||= $dbh->prepare(qq{
576 my $drop_down = $hostname . '/' . $share;
577 $drop_down =~ s#//+#/#g;
579 $sth->{insert_share}->execute($hostID,$share, $drop_down);
580 return $dbh->last_insert_id(undef,undef,'shares',undef);
588 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
590 return $beenThere->{$key} if (defined($beenThere->{$key}));
592 $sth->{file_in_db} ||= $dbh->prepare(qq{
594 WHERE shareID = ? and
597 ( date = ? or date = ? or date = ? )
601 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
602 $sth->{file_in_db}->execute(@param);
603 my $rows = $sth->{file_in_db}->rows;
604 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
606 $beenThere->{$key}++;
608 $sth->{'insert_files'}->execute(@data) unless ($rows);
612 ####################################################
613 # recursing through filesystem structure and #
614 # and returning flattened files list #
615 ####################################################
616 sub recurseDir($$$$$$$$) {
618 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
620 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
622 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
627 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
628 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
630 # first, add all the entries in current directory
631 foreach my $path_key (keys %{$filesInBackup}) {
632 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
637 $filesInBackup->{$path_key}->{'relPath'},
638 $filesInBackup->{$path_key}->{'mtime'},
639 $filesInBackup->{$path_key}->{'type'},
640 $filesInBackup->{$path_key}->{'size'}
643 my $key = join(" ", (
647 $filesInBackup->{$path_key}->{'mtime'},
648 $filesInBackup->{$path_key}->{'size'}
651 my $key_dst_prev = join(" ", (
655 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
656 $filesInBackup->{$path_key}->{'size'}
659 my $key_dst_next = join(" ", (
663 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
664 $filesInBackup->{$path_key}->{'size'}
669 ! defined($beenThere->{$key}) &&
670 ! defined($beenThere->{$key_dst_prev}) &&
671 ! defined($beenThere->{$key_dst_next}) &&
672 ! ($found = found_in_db($key, @data))
674 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
676 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
677 $new_dirs++ unless ($found);
678 print STDERR " dir\n" if ($debug >= 2);
680 $new_files++ unless ($found);
681 print STDERR " file\n" if ($debug >= 2);
683 $size += $filesInBackup->{$path_key}->{'size'} || 0;
686 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
689 my $full_path = $dir . '/' . $path_key;
690 push @stack, $full_path;
691 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
693 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
705 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
707 while ( my $dir = shift @stack ) {
708 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
709 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
718 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);