4 use lib "/usr/local/BackupPC/lib";
11 use Time::HiRes qw/time/;
13 use POSIX qw/strftime/;
14 use BackupPC::SearchLib;
16 use Data::Dump qw(dump);
18 use constant BPC_FTYPE_DIR => 5;
19 use constant EST_CHUNK => 4096;
21 # daylight saving time change offset for 1h
22 my $dst_offset = 60 * 60;
29 my $pid_path = abs_path($0);
30 $pid_path =~ s/\W+/_/g;
32 my $pidfile = new File::Pid({
33 file => "/tmp/$pid_path",
36 if (my $pid = $pidfile->running ) {
37 die "$0 already running: $pid\n";
38 } elsif ($pidfile->pid ne $$) {
40 $pidfile = new File::Pid;
42 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
45 my $t_fmt = '%Y-%m-%d %H:%M:%S';
48 my $bpc = BackupPC::Lib->new || die;
49 my %Conf = $bpc->Conf();
50 my $TopDir = $bpc->TopDir();
53 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
54 my $user = $Conf{SearchUser} || '';
56 my $index_node_url = $Conf{HyperEstraierIndex};
58 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
62 if ( !getopts("cdm:v:ijfq", \%opt ) ) {
64 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i|-j|-f]
67 -c create database on first use
68 -d delete database before import
69 -m num import just num increments for one host
70 -v num set verbosity (debug) level (default $debug)
71 -i update Hyper Estraier full text index
72 -j update full text, don't check existing files
73 -f don't do anything with full text index
74 -q be quiet for hosts without changes
76 Option -j is variation on -i. It will allow faster initial creation
77 of full-text index from existing database.
79 Option -f will create database which is out of sync with full text index. You
80 will have to re-run $0 with -i to fix it.
87 print "Debug level at $opt{v}\n";
90 print "WARNING: disabling full-text index update. You need to re-run $0 -j !\n";
91 $index_node_url = undef;
97 my $t = shift || return;
99 my ($ss,$mm,$hh) = gmtime($t);
100 $out .= "${hh}h" if ($hh);
101 $out .= sprintf("%02d:%02d", $mm,$ss);
106 return strftime($t_fmt,localtime());
113 my ($host_id, $share_id, $num) = @_;
115 my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
117 unless ($index_node_url && $index_node_url =~ m#^http://#) {
118 print STDERR "HyperEstraier support not enabled or index node invalid\n" if ($debug);
123 print curr_time," updating Hyper Estraier:";
130 if ($index_node_url) {
131 print " opening index $index_node_url";
132 $hest_node ||= Search::Estraier::Node->new(
133 url => $index_node_url,
138 print " via node URL";
147 if (defined($host_id) && defined($share_id) && defined($num)) {
154 @data = ( $host_id, $share_id, $num );
157 my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
159 my $sth = $dbh->prepare(qq{
163 shares.name AS sname,
164 -- shares.share AS sharename,
165 files.backupnum AS backupnum,
166 -- files.name AS filename,
167 files.path AS filepath,
171 files.shareid AS shareid,
172 backups.date AS backup_date
174 INNER JOIN shares ON files.shareID=shares.ID
175 INNER JOIN hosts ON hosts.ID = shares.hostID
176 INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
181 $sth->execute(@data);
182 $results = $sth->rows;
185 print " - no new files\n";
192 my $t = shift || return;
193 my $iso = BackupPC::Lib::timeStamp($t);
198 while (my $row = $sth->fetchrow_hashref()) {
200 my $uri = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath};
201 if (! $skip_check && $hest_node) {
202 my $id = $hest_node->uri_to_id($uri);
203 next if ($id && $id == -1);
206 # create a document object
207 my $doc = Search::Estraier::Document->new;
209 # add attributes to the document object
210 $doc->add_attr('@uri', $uri);
212 foreach my $c (@{ $sth->{NAME} }) {
213 print STDERR "attr $c = $row->{$c}\n" if ($debug > 2);
214 $doc->add_attr($c, $row->{$c}) if (defined($row->{$c}));
217 #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
219 # add the body text to the document object
220 my $path = $row->{'filepath'};
221 $doc->add_text($path);
222 $path =~ s/(.)/$1 /g;
223 $doc->add_hidden_text($path);
225 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
227 # register the document object to the database
228 $hest_node->put_doc($doc) if ($hest_node);
235 $offset += EST_CHUNK;
237 } while ($results == EST_CHUNK);
239 my $dur = (time() - $t) || 1;
240 printf(" [%.2f/s dur: %s]\n",
250 if ( ( $opt{i} || $opt{j} ) && !$opt{c} ) {
252 print "force update of Hyper Estraier index ";
253 print "by -i flag" if ($opt{i});
254 print "by -j flag" if ($opt{j});
262 my $index = shift || return;
263 my ($table,$col,$unique) = split(/:/, $index);
266 print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
267 $dbh->do(qq{ create $unique index $index on $table($col) });
270 print "creating tables...\n";
274 ID SERIAL PRIMARY KEY,
275 name VARCHAR(30) NOT NULL,
279 create table shares (
280 ID SERIAL PRIMARY KEY,
281 hostID INTEGER NOT NULL references hosts(id),
282 name VARCHAR(30) NOT NULL,
283 share VARCHAR(200) NOT NULL
287 ID SERIAL PRIMARY KEY,
288 num INTEGER NOT NULL,
289 name VARCHAR(255) NOT NULL,
293 create table backups (
295 hostID INTEGER NOT NULL references hosts(id),
296 num INTEGER NOT NULL,
297 date integer NOT NULL,
298 type CHAR(4) not null,
299 shareID integer not null references shares(id),
300 size bigint not null,
301 inc_size bigint not null default -1,
302 inc_deleted boolean default false,
303 parts integer not null default 0,
309 shareID INTEGER NOT NULL references shares(id),
310 backupNum INTEGER NOT NULL,
311 name VARCHAR(255) NOT NULL,
312 path VARCHAR(255) NOT NULL,
313 date integer NOT NULL,
314 type INTEGER NOT NULL,
315 size bigint NOT NULL,
319 create table archive (
322 total_size bigint default -1,
324 username varchar(20) not null,
325 date timestamp default now(),
329 create table archive_backup (
330 archive_id int not null references archive(id) on delete cascade,
331 backup_id int not null references backups(id),
332 primary key(archive_id, backup_id)
335 create table archive_burned (
336 archive_id int references archive(id),
337 date timestamp default now(),
338 part int not null default 1,
339 copy int not null default 1,
340 iso_size bigint default -1
343 create table backup_parts (
345 backup_id int references backups(id),
346 part_nr int not null check (part_nr > 0),
347 tar_size bigint not null check (tar_size > 0),
348 size bigint not null check (size > 0),
350 items int not null check (items > 0),
351 date timestamp default now(),
356 print "creating indexes: ";
358 foreach my $index (qw(
371 archive_burned:archive_id
372 backup_parts:backup_id,part_nr:unique
377 print " creating sequence: ";
378 foreach my $seq (qw/dvd_nr/) {
380 $dbh->do( qq{ CREATE SEQUENCE $seq } );
383 print " creating triggers ";
384 $dbh->do( <<__END_OF_TRIGGER__ );
386 create or replace function backup_parts_check() returns trigger as '
392 -- raise notice ''old/new parts %/% backup_id %/%'', old.parts, new.parts, old.id, new.id;
393 if (TG_OP=''UPDATE'') then
395 b_parts := new.parts;
396 elsif (TG_OP = ''INSERT'') then
398 b_parts := new.parts;
400 b_counted := (select count(*) from backup_parts where backup_id = b_id);
401 -- raise notice ''backup % parts %'', b_id, b_parts;
402 if ( b_parts != b_counted ) then
403 raise exception ''Update of backup % aborted, requested % parts and there are really % parts'', b_id, b_parts, b_counted;
409 create trigger do_backup_parts_check
410 after insert or update or delete on backups
411 for each row execute procedure backup_parts_check();
413 create or replace function backup_backup_parts_check() returns trigger as '
419 if (TG_OP = ''INSERT'') then
420 -- raise notice ''trigger: % backup_id %'', TG_OP, new.backup_id;
421 b_id = new.backup_id;
422 my_part_nr = new.part_nr;
423 execute ''update backups set parts = parts + 1 where id = '' || b_id;
424 elsif (TG_OP = ''DELETE'') then
425 -- raise notice ''trigger: % backup_id %'', TG_OP, old.backup_id;
426 b_id = old.backup_id;
427 my_part_nr = old.part_nr;
428 execute ''update backups set parts = parts - 1 where id = '' || b_id;
430 calc_part := (select count(part_nr) from backup_parts where backup_id = b_id);
431 if ( my_part_nr != calc_part ) then
432 raise exception ''Update of backup_parts with backup_id % aborted, requested part_nr is % and calulated next is %'', b_id, my_part_nr, calc_part;
438 create trigger do_backup_backup_parts_check
439 after insert or update or delete on backup_parts
440 for each row execute procedure backup_backup_parts_check();
450 ## delete data before inseting ##
453 foreach my $table (qw(files dvds backups shares hosts)) {
455 $dbh->do(qq{ DELETE FROM $table });
462 ## insert new values ##
465 $hosts = $bpc->HostInfoRead();
471 $sth->{insert_hosts} = $dbh->prepare(qq{
472 INSERT INTO hosts (name, IP) VALUES (?,?)
475 $sth->{hosts_by_name} = $dbh->prepare(qq{
476 SELECT ID FROM hosts WHERE name=?
479 $sth->{backups_count} = $dbh->prepare(qq{
482 WHERE hostID=? AND num=? AND shareid=?
485 $sth->{insert_backups} = $dbh->prepare(qq{
486 INSERT INTO backups (hostID, num, date, type, shareid, size)
487 VALUES (?,?,?,?,?,-1)
490 $sth->{update_backups_size} = $dbh->prepare(qq{
491 UPDATE backups SET size = ?
492 WHERE hostID = ? and num = ? and date = ? and type =? and shareid = ?
495 $sth->{insert_files} = $dbh->prepare(qq{
497 (shareID, backupNum, name, path, date, type, size)
498 VALUES (?,?,?,?,?,?,?)
501 my @hosts = keys %{$hosts};
504 foreach my $host_key (@hosts) {
506 my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
508 $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
510 unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
511 $sth->{insert_hosts}->execute(
512 $hosts->{$host_key}->{'host'},
513 $hosts->{$host_key}->{'ip'}
516 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
520 # get backups for a host
521 my @backups = $bpc->BackupInfoRead($hostname);
522 warn "XXXX ",dump(@backups);
523 my $incs = scalar @backups;
525 my $host_header = sprintf("host %s [%d/%d]: %d increments\n",
526 $hosts->{$host_key}->{'host'},
531 print $host_header unless ($opt{q});
536 foreach my $backup (@backups) {
539 last if ($opt{m} && $inc_nr > $opt{m});
541 my $backupNum = $backup->{'num'};
542 my @backupShares = ();
544 my $share_header = sprintf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n",
545 $hosts->{$host_key}->{'host'},
546 $inc_nr, $incs, $backupNum,
547 $backup->{type} || '?',
548 $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
549 strftime($t_fmt,localtime($backup->{startTime})),
550 fmt_time($backup->{endTime} - $backup->{startTime})
552 print $share_header unless ($opt{q});
554 my $files = BackupPC::View->new($bpc, $hostname, \@backups, { only_first => 1 });
556 foreach my $share ($files->shareList($backupNum)) {
560 $shareID = getShareID($share, $hostID, $hostname);
562 $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
563 my ($count) = $sth->{backups_count}->fetchrow_array();
564 # skip if allready in database!
565 next if ($count > 0);
567 # dump host and share header for -q
571 $host_header = undef;
577 print curr_time," ", $share;
579 $sth->{insert_backups}->execute(
582 $backup->{'endTime'},
583 substr($backup->{'type'},0,4),
587 my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
590 $sth->{update_backups_size}->execute(
594 $backup->{'endTime'},
595 substr($backup->{'type'},0,4),
606 my $dur = (time() - $t) || 1;
607 printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
609 ($size / 1024 / 1024),
614 hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
623 print "total duration: ",fmt_time(time() - $start_t),"\n";
629 my ($share, $hostID, $hostname) = @_;
631 $sth->{share_id} ||= $dbh->prepare(qq{
632 SELECT ID FROM shares WHERE hostID=? AND name=?
635 $sth->{share_id}->execute($hostID,$share);
637 my ($id) = $sth->{share_id}->fetchrow_array();
639 return $id if (defined($id));
641 $sth->{insert_share} ||= $dbh->prepare(qq{
647 my $drop_down = $hostname . '/' . $share;
648 $drop_down =~ s#//+#/#g;
650 $sth->{insert_share}->execute($hostID,$share, $drop_down);
651 return $dbh->last_insert_id(undef,undef,'shares',undef);
659 my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
661 return $beenThere->{$key} if (defined($beenThere->{$key}));
663 $sth->{file_in_db} ||= $dbh->prepare(qq{
665 WHERE shareID = ? and
668 ( date = ? or date = ? or date = ? )
672 my @param = ($shareID,$path,$size,$date, $date-$dst_offset, $date+$dst_offset);
673 $sth->{file_in_db}->execute(@param);
674 my $rows = $sth->{file_in_db}->rows;
675 print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
677 $beenThere->{$key}++;
679 $sth->{'insert_files'}->execute(@data) unless ($rows);
683 ####################################################
684 # recursing through filesystem structure and #
685 # and returning flattened files list #
686 ####################################################
687 sub recurseDir($$$$$$$$) {
689 my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
691 print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
693 my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
698 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
699 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
701 # first, add all the entries in current directory
702 foreach my $path_key (keys %{$filesInBackup}) {
703 print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
708 $filesInBackup->{$path_key}->{'relPath'},
709 $filesInBackup->{$path_key}->{'mtime'},
710 $filesInBackup->{$path_key}->{'type'},
711 $filesInBackup->{$path_key}->{'size'}
714 my $key = join(" ", (
718 $filesInBackup->{$path_key}->{'mtime'},
719 $filesInBackup->{$path_key}->{'size'}
722 my $key_dst_prev = join(" ", (
726 $filesInBackup->{$path_key}->{'mtime'} - $dst_offset,
727 $filesInBackup->{$path_key}->{'size'}
730 my $key_dst_next = join(" ", (
734 $filesInBackup->{$path_key}->{'mtime'} + $dst_offset,
735 $filesInBackup->{$path_key}->{'size'}
740 ! defined($beenThere->{$key}) &&
741 ! defined($beenThere->{$key_dst_prev}) &&
742 ! defined($beenThere->{$key_dst_next}) &&
743 ! ($found = found_in_db($key, @data))
745 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
747 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
748 $new_dirs++ unless ($found);
749 print STDERR " dir\n" if ($debug >= 2);
751 $new_files++ unless ($found);
752 print STDERR " file\n" if ($debug >= 2);
754 $size += $filesInBackup->{$path_key}->{'size'} || 0;
757 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
760 my $full_path = $dir . '/' . $path_key;
761 push @stack, $full_path;
762 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
764 # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
776 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
778 while ( my $dir = shift @stack ) {
779 my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
780 print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
789 return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);