#!/usr/local/bin/perl -w use strict; use lib "__INSTALLDIR__/lib"; use DBI; use BackupPC::Lib; use BackupPC::View; use Data::Dumper; use Getopt::Std; use Time::HiRes qw/time/; use File::Pid; use POSIX qw/strftime/; use BackupPC::SearchLib; use constant BPC_FTYPE_DIR => 5; use constant EST_CHUNK => 100000; my $debug = 0; $|=1; my $start_t = time(); my $pidfile = new File::Pid; if (my $pid = $pidfile->running ) { die "$0 already running: $pid\n"; } elsif ($pidfile->pid ne $$) { $pidfile->remove; $pidfile = new File::Pid; } $pidfile->write; print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n"; my $t_fmt = '%Y-%m-%d %H:%M:%S'; my $hosts; my $bpc = BackupPC::Lib->new || die; my %Conf = $bpc->Conf(); my $TopDir = $bpc->TopDir(); my $beenThere = {}; my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n"; my $user = $Conf{SearchUser} || ''; my $use_hest = $Conf{HyperEstraierIndex}; my ($index_path, $index_node_url) = BackupPC::SearchLib::getHyperEstraier_url($use_hest); my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 }); my %opt; if ( !getopts("cdm:v:ij", \%opt ) ) { print STDERR <sync(); $hest_db->close(); } exit(0); } $SIG{'INT'} = \&signal; $SIG{'QUIT'} = \&signal; sub hest_update { my ($host_id, $share_id, $num) = @_; my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n"; unless ($use_hest) { print STDERR "HyperEstraier support not enabled in configuration\n"; return; } print curr_time," updating HyperEstraier:"; my $t = time(); my $offset = 0; my $added = 0; print " opening index $use_hest"; if ($index_path) { $hest_db = HyperEstraier::Database->new(); $hest_db->open($TopDir . $index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT); print " directly"; } elsif ($index_node_url) { $hest_node ||= HyperEstraier::Node->new($index_node_url); $hest_node->set_auth('admin', 'admin'); print " via node URL"; } else { die "don't know how to use HyperEstraier Index $use_hest"; } print " increment is " . EST_CHUNK . " files:"; my $results = 0; do { my $where = ''; my @data; if (defined($host_id) && defined($share_id) && defined($num)) { $where = qq{ WHERE hosts.id = ? AND shares.id = ? AND files.backupnum = ? }; @data = ( $host_id, $share_id, $num ); } my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset); my $sth = $dbh->prepare(qq{ SELECT files.id AS fid, hosts.name AS hname, shares.name AS sname, -- shares.share AS sharename, files.backupnum AS backupnum, -- files.name AS filename, files.path AS filepath, files.date AS date, files.type AS type, files.size AS size, files.shareid AS shareid, backups.date AS backup_date FROM files INNER JOIN shares ON files.shareID=shares.ID INNER JOIN hosts ON hosts.ID = shares.hostID INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID $where $limit }); $sth->execute(@data); $results = $sth->rows; if ($results == 0) { print " - no new files\n"; last; } sub fmt_date { my $t = shift || return; my $iso = BackupPC::Lib::timeStamp($t); $iso =~ s/\s/T/; return $iso; } while (my $row = $sth->fetchrow_hashref()) { my $fid = $row->{'fid'} || die "no fid?"; my $uri = 'file:///' . $fid; unless ($skip_check) { my $id = ($hest_db || $hest_node)->uri_to_id($uri); next unless ($id == -1); } # create a document object my $doc = HyperEstraier::Document->new; # add attributes to the document object $doc->add_attr('@uri', $uri); foreach my $c (@{ $sth->{NAME} }) { $doc->add_attr($c, $row->{$c}) if ($row->{$c}); } #$doc->add_attr('@cdate', fmt_date($row->{'date'})); # add the body text to the document object my $path = $row->{'filepath'}; $doc->add_text($path); $path =~ s/(.)/$1 /g; $doc->add_hidden_text($path); print STDERR $doc->dump_draft,"\n" if ($debug > 1); # register the document object to the database if ($hest_db) { $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN); } elsif ($hest_node) { $hest_node->put_doc($doc); } else { die "not supported"; } $added++; } print " $added"; $hest_db->sync() if ($index_path); $offset += EST_CHUNK; } while ($results == EST_CHUNK); if ($index_path) { print ", close"; $hest_db->close(); } my $dur = (time() - $t) || 1; printf(" [%.2f/s dur: %s]\n", ( $added / $dur ), fmt_time($dur) ); } #---- /subs ---- ## update index ## if (($opt{i} || $opt{j} || ($index_path && ! -e $TopDir . $index_path)) && !$opt{c}) { # update all print "force update of HyperEstraier index "; print "importing existing data" unless (-e $TopDir . $index_path); print "by -i flag" if ($opt{i}); print "by -j flag" if ($opt{j}); print "\n"; hest_update(); } ## create tables ## if ($opt{c}) { sub do_index { my $index = shift || return; my ($table,$col,$unique) = split(/:/, $index); $unique ||= ''; $index =~ s/\W+/_/g; print "$index on $table($col)" . ( $unique ? "u" : "" ) . " "; $dbh->do(qq{ create $unique index $index on $table($col) }); } print "creating tables...\n"; $dbh->do( qq{ create table hosts ( ID SERIAL PRIMARY KEY, name VARCHAR(30) NOT NULL, IP VARCHAR(15) ); create table shares ( ID SERIAL PRIMARY KEY, hostID INTEGER NOT NULL references hosts(id), name VARCHAR(30) NOT NULL, share VARCHAR(200) NOT NULL ); create table dvds ( ID SERIAL PRIMARY KEY, num INTEGER NOT NULL, name VARCHAR(255) NOT NULL, mjesto VARCHAR(255) ); create table backups ( id serial, hostID INTEGER NOT NULL references hosts(id), num INTEGER NOT NULL, date integer NOT NULL, type CHAR(4) not null, shareID integer not null references shares(id), size bigint not null, inc_size bigint not null default -1, inc_deleted boolean default false, parts integer not null default 1, PRIMARY KEY(id) ); create table files ( ID SERIAL, shareID INTEGER NOT NULL references shares(id), backupNum INTEGER NOT NULL, name VARCHAR(255) NOT NULL, path VARCHAR(255) NOT NULL, date integer NOT NULL, type INTEGER NOT NULL, size bigint NOT NULL, primary key(id) ); create table archive ( id serial, dvd_nr int not null, total_size bigint default -1, note text, username varchar(20) not null, date timestamp default now(), primary key(id) ); create table archive_backup ( archive_id int not null references archive(id) on delete cascade, backup_id int not null references backups(id), primary key(archive_id, backup_id) ); create table archive_burned ( archive_id int references archive(id), date timestamp default now(), part int not null default 1, copy int not null default 1, iso_size bigint default -1 ); create table backup_parts ( id serial, backup_id int references backups(id), part_nr int not null check (part_nr > 0), tar_size bigint not null check (tar_size > 0), size bigint not null check (size > 0), md5 text not null, items int not null check (items > 0), date timestamp default now(), primary key(id) ); }); print "creating indexes: "; foreach my $index (qw( hosts:name backups:hostID backups:num backups:shareID shares:hostID shares:name files:shareID files:path files:name files:date files:size archive:dvd_nr archive_burned:archive_id backup_parts:backup_id,part_nr )) { do_index($index); } print " creating sequence: "; foreach my $seq (qw/dvd_nr/) { print "$seq "; $dbh->do( qq{ CREATE SEQUENCE $seq } ); } print "...\n"; $dbh->commit; } ## delete data before inseting ## if ($opt{d}) { print "deleting "; foreach my $table (qw(files dvds backups shares hosts)) { print "$table "; $dbh->do(qq{ DELETE FROM $table }); } print " done...\n"; $dbh->commit; } ## insert new values ## # get hosts $hosts = $bpc->HostInfoRead(); my $hostID; my $shareID; my $sth; $sth->{insert_hosts} = $dbh->prepare(qq{ INSERT INTO hosts (name, IP) VALUES (?,?) }); $sth->{hosts_by_name} = $dbh->prepare(qq{ SELECT ID FROM hosts WHERE name=? }); $sth->{backups_count} = $dbh->prepare(qq{ SELECT COUNT(*) FROM backups WHERE hostID=? AND num=? AND shareid=? }); $sth->{insert_backups} = $dbh->prepare(qq{ INSERT INTO backups (hostID, num, date, type, shareid, size) VALUES (?,?,?,?,?,?) }); $sth->{insert_files} = $dbh->prepare(qq{ INSERT INTO files (shareID, backupNum, name, path, date, type, size) VALUES (?,?,?,?,?,?,?) }); foreach my $host_key (keys %{$hosts}) { my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key"; $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'}); unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) { $sth->{insert_hosts}->execute( $hosts->{$host_key}->{'host'}, $hosts->{$host_key}->{'ip'} ); $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef); } print "host ".$hosts->{$host_key}->{'host'}.": "; # get backups for a host my @backups = $bpc->BackupInfoRead($hostname); my $incs = scalar @backups; print "$incs increments\n"; my $inc_nr = 0; $beenThere = {}; foreach my $backup (@backups) { $inc_nr++; last if ($opt{m} && $inc_nr > $opt{m}); my $backupNum = $backup->{'num'}; my @backupShares = (); printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n", $hosts->{$host_key}->{'host'}, $inc_nr, $incs, $backupNum, $backup->{type} || '?', $backup->{nFilesNew} || '?', $backup->{nFiles} || '?', strftime($t_fmt,localtime($backup->{startTime})), fmt_time($backup->{endTime} - $backup->{startTime}) ); my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1); foreach my $share ($files->shareList($backupNum)) { my $t = time(); $shareID = getShareID($share, $hostID, $hostname); $sth->{backups_count}->execute($hostID, $backupNum, $shareID); my ($count) = $sth->{backups_count}->fetchrow_array(); # skip if allready in database! next if ($count > 0); # dump some log print curr_time," ", $share; my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID); $sth->{insert_backups}->execute( $hostID, $backupNum, $backup->{'endTime'}, substr($backup->{'type'},0,4), $shareID, $size, ); print " commit"; $dbh->commit(); my $dur = (time() - $t) || 1; printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n", $nf, $f, $nd, $d, ($size / 1024 / 1024), ( ($f+$d) / $dur ), fmt_time($dur) ); hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0); } } } undef $sth; $dbh->commit(); $dbh->disconnect(); print "total duration: ",fmt_time(time() - $start_t),"\n"; $pidfile->remove; sub getShareID() { my ($share, $hostID, $hostname) = @_; $sth->{share_id} ||= $dbh->prepare(qq{ SELECT ID FROM shares WHERE hostID=? AND name=? }); $sth->{share_id}->execute($hostID,$share); my ($id) = $sth->{share_id}->fetchrow_array(); return $id if (defined($id)); $sth->{insert_share} ||= $dbh->prepare(qq{ INSERT INTO shares (hostID,name,share) VALUES (?,?,?) }); my $drop_down = $hostname . '/' . $share; $drop_down =~ s#//+#/#g; $sth->{insert_share}->execute($hostID,$share, $drop_down); return $dbh->last_insert_id(undef,undef,'shares',undef); } sub found_in_db { my @data = @_; shift @data; my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_; return $beenThere->{$key} if (defined($beenThere->{$key})); $sth->{file_in_db} ||= $dbh->prepare(qq{ SELECT 1 FROM files WHERE shareID = ? and path = ? and date = ? and size = ? LIMIT 1 }); my @param = ($shareID,$path,$date,$size); $sth->{file_in_db}->execute(@param); my $rows = $sth->{file_in_db}->rows; print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3); $beenThere->{$key}++; $sth->{'insert_files'}->execute(@data) unless ($rows); return $rows; } #################################################### # recursing through filesystem structure and # # and returning flattened files list # #################################################### sub recurseDir($$$$$$$$) { my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_; print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1); my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0); { # scope my @stack; print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2); my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir); # first, add all the entries in current directory foreach my $path_key (keys %{$filesInBackup}) { print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3); my @data = ( $shareID, $backupNum, $path_key, $filesInBackup->{$path_key}->{'relPath'}, $filesInBackup->{$path_key}->{'mtime'}, $filesInBackup->{$path_key}->{'type'}, $filesInBackup->{$path_key}->{'size'} ); my $key = join(" ", ( $shareID, $dir, $path_key, $filesInBackup->{$path_key}->{'mtime'}, $filesInBackup->{$path_key}->{'size'} )); my $found; if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) { print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2); if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) { $new_dirs++ unless ($found); print STDERR " dir\n" if ($debug >= 2); } else { $new_files++ unless ($found); print STDERR " file\n" if ($debug >= 2); } $size += $filesInBackup->{$path_key}->{'size'} || 0; } if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) { $nr_dirs++; my $full_path = $dir . '/' . $path_key; push @stack, $full_path; print STDERR "### store to stack: $full_path\n" if ($debug >= 3); # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key}); # # $nr_files += $f; # $new_files += $nf; # $nr_dirs += $d; # $new_dirs += $nd; } else { $nr_files++; } } print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2); while ( my $dir = shift @stack ) { my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID); print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1); $nr_files += $f; $new_files += $nf; $nr_dirs += $d; $new_dirs += $nd; $size += $s; } } return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size); }