#!/usr/local/bin/perl -w use strict; use lib "__INSTALLDIR__/lib"; use DBI; use BackupPC::Lib; use BackupPC::View; use Data::Dumper; use Getopt::Std; use Time::HiRes qw/time/; use File::Pid; use POSIX qw/strftime/; use constant BPC_FTYPE_DIR => 5; use constant EST_CHUNK => 100000; my $debug = 0; $|=1; my $start_t = time(); my $pidfile = new File::Pid; if (my $pid = $pidfile->running ) { die "$0 already running: $pid\n"; } elsif ($pidfile->pid ne $$) { $pidfile->remove; $pidfile = new File::Pid; } $pidfile->write; print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n"; my $t_fmt = '%Y-%m-%d %H:%M:%S'; my $hosts; my $bpc = BackupPC::Lib->new || die; my %Conf = $bpc->Conf(); my $TopDir = $bpc->TopDir(); my $beenThere = {}; my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n"; my $user = $Conf{SearchUser} || ''; my $index_path = $Conf{HyperEstraierIndex}; $index_path = $TopDir . '/' . $index_path; $index_path =~ s#//#/#g; if ($index_path) { use HyperEstraier; } my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 }); my %opt; if ( !getopts("cdm:v:i", \%opt ) ) { print STDERR <sync(); $hest_db->close(); } exit(0); } $SIG{'INT'} = \&signal; $SIG{'QUIT'} = \&signal; sub hest_update { my ($host_id, $share_id, $num) = @_; print curr_time," updating HyperEstraier:"; my $t = time(); my $offset = 0; my $added = 0; print " opening index $index_path"; $hest_db = HyperEstraier::Database->new(); $hest_db->open($index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT); print " increment is " . EST_CHUNK . " files:"; my $results = 0; do { my $where = ''; my @data; if ($host_id && $share_id && $num) { $where = qq{ WHERE hosts.id = ? AND shares.id = ? AND files.backupnum = ? }; @data = ( $host_id, $share_id, $num ); } my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset); my $sth = $dbh->prepare(qq{ SELECT files.id AS fid, hosts.name AS hname, shares.name AS sname, -- shares.share AS sharename, files.backupnum AS backupnum, -- files.name AS filename, files.path AS filepath, files.date AS date, files.type AS type, files.size AS size, files.shareid AS shareid, backups.date AS backup_date FROM files INNER JOIN shares ON files.shareID=shares.ID INNER JOIN hosts ON hosts.ID = shares.hostID INNER JOIN backups ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID $where $limit }); $sth->execute(@data); $results = $sth->rows; if ($results == 0) { print " - no new files\n"; last; } sub fmt_date { my $t = shift || return; my $iso = BackupPC::Lib::timeStamp($t); $iso =~ s/\s/T/; return $iso; } while (my $row = $sth->fetchrow_hashref()) { my $fid = $row->{'fid'} || die "no fid?"; my $uri = 'file:///' . $fid; my $id = $hest_db->uri_to_id($uri); next unless ($id == -1); # create a document object my $doc = HyperEstraier::Document->new; # add attributes to the document object $doc->add_attr('@uri', $uri); foreach my $c (@{ $sth->{NAME} }) { $doc->add_attr($c, $row->{$c}) if ($row->{$c}); } #$doc->add_attr('@cdate', fmt_date($row->{'date'})); # add the body text to the document object my $path = $row->{'filepath'}; $doc->add_text($path); $path =~ s/(.)/$1 /g; $doc->add_hidden_text($path); print STDERR $doc->dump_draft,"\n" if ($debug > 1); # register the document object to the database $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN); $added++; } print " $added"; $hest_db->sync(); $offset += EST_CHUNK; } while ($results == EST_CHUNK); print ", close"; $hest_db->close(); my $dur = (time() - $t) || 1; printf(" [%.2f/s dur: %s]\n", ( $added / $dur ), fmt_time($dur) ); } #---- /subs ---- ## update index ## if (($opt{i} || ($index_path && ! -e $index_path)) && !$opt{c}) { # update all print "force update of HyperEstraier index "; print "importing existing data" unless (-e $index_path); print "by -i flag" if ($opt{i}); print "\n"; hest_update(); } ## create tables ## if ($opt{c}) { sub do_index { my $index = shift || return; my ($table,$col,$unique) = split(/_/, $index); $unique ||= ''; $index =~ s/,/_/g; $dbh->do(qq{ create $unique index $index on $table($col) }); } print "creating tables...\n"; $dbh->do(qq{ create table hosts ( ID SERIAL PRIMARY KEY, name VARCHAR(30) NOT NULL, IP VARCHAR(15) ); }); $dbh->do(qq{ create table shares ( ID SERIAL PRIMARY KEY, hostID INTEGER NOT NULL references hosts(id), name VARCHAR(30) NOT NULL, share VARCHAR(200) NOT NULL, localpath VARCHAR(200) ); }); $dbh->do(qq{ create table backups ( hostID INTEGER NOT NULL references hosts(id), num INTEGER NOT NULL, date integer NOT NULL, type CHAR(4) not null, shareID integer not null references shares(id), size integer not null, PRIMARY KEY(hostID, num, shareID) ); }); #do_index('backups_hostid,num_unique'); $dbh->do(qq{ create table dvds ( ID SERIAL PRIMARY KEY, num INTEGER NOT NULL, name VARCHAR(255) NOT NULL, mjesto VARCHAR(255) ); }); $dbh->do(qq{ create table files ( ID SERIAL PRIMARY KEY, shareID INTEGER NOT NULL references shares(id), backupNum INTEGER NOT NULL, name VARCHAR(255) NOT NULL, path VARCHAR(255) NOT NULL, date integer NOT NULL, type INTEGER NOT NULL, size INTEGER NOT NULL, dvdid INTEGER references dvds(id) ); }); print "creating indexes:"; foreach my $index (qw( hosts_name backups_hostID backups_num shares_hostID shares_name files_shareID files_path files_name files_date files_size )) { print " $index"; do_index($index); } print "...\n"; $dbh->commit; } ## delete data before inseting ## if ($opt{d}) { print "deleting "; foreach my $table (qw(files dvds backups shares hosts)) { print "$table "; $dbh->do(qq{ DELETE FROM $table }); } print " done...\n"; $dbh->commit; } ## insert new values ## # get hosts $hosts = $bpc->HostInfoRead(); my $hostID; my $shareID; my $sth; $sth->{insert_hosts} = $dbh->prepare(qq{ INSERT INTO hosts (name, IP) VALUES (?,?) }); $sth->{hosts_by_name} = $dbh->prepare(qq{ SELECT ID FROM hosts WHERE name=? }); $sth->{backups_count} = $dbh->prepare(qq{ SELECT COUNT(*) FROM backups WHERE hostID=? AND num=? AND shareid=? }); $sth->{insert_backups} = $dbh->prepare(qq{ INSERT INTO backups (hostID, num, date, type, shareid, size) VALUES (?,?,?,?,?,?) }); $sth->{insert_files} = $dbh->prepare(qq{ INSERT INTO files (shareID, backupNum, name, path, date, type, size) VALUES (?,?,?,?,?,?,?) }); foreach my $host_key (keys %{$hosts}) { my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key"; $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'}); unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) { $sth->{insert_hosts}->execute( $hosts->{$host_key}->{'host'}, $hosts->{$host_key}->{'ip'} ); $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef); } print "host ".$hosts->{$host_key}->{'host'}.": "; # get backups for a host my @backups = $bpc->BackupInfoRead($hostname); my $incs = scalar @backups; print "$incs increments\n"; my $inc_nr = 0; $beenThere = {}; foreach my $backup (@backups) { $inc_nr++; last if ($opt{m} && $inc_nr > $opt{m}); my $backupNum = $backup->{'num'}; my @backupShares = (); printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n", $hosts->{$host_key}->{'host'}, $inc_nr, $incs, $backupNum, $backup->{type} || '?', $backup->{nFilesNew} || '?', $backup->{nFiles} || '?', strftime($t_fmt,localtime($backup->{startTime})), fmt_time($backup->{endTime} - $backup->{startTime}) ); my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1); foreach my $share ($files->shareList($backupNum)) { my $t = time(); $shareID = getShareID($share, $hostID, $hostname); $sth->{backups_count}->execute($hostID, $backupNum, $shareID); my ($count) = $sth->{backups_count}->fetchrow_array(); # skip if allready in database! next if ($count > 0); # dump some log print curr_time," ", $share; my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID); $sth->{insert_backups}->execute( $hostID, $backupNum, $backup->{'endTime'}, $backup->{'type'}, $shareID, $size, ); print " commit"; $dbh->commit(); my $dur = (time() - $t) || 1; printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n", $nf, $f, $nd, $d, ($size / 1024 / 1024), ( ($f+$d) / $dur ), fmt_time($dur) ); hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0); } } } undef $sth; $dbh->commit(); $dbh->disconnect(); print "total duration: ",fmt_time(time() - $start_t),"\n"; $pidfile->remove; sub getShareID() { my ($share, $hostID, $hostname) = @_; $sth->{share_id} ||= $dbh->prepare(qq{ SELECT ID FROM shares WHERE hostID=? AND name=? }); $sth->{share_id}->execute($hostID,$share); my ($id) = $sth->{share_id}->fetchrow_array(); return $id if (defined($id)); $sth->{insert_share} ||= $dbh->prepare(qq{ INSERT INTO shares (hostID,name,share,localpath) VALUES (?,?,?,?) }); my $drop_down = $hostname . '/' . $share; $drop_down =~ s#//+#/#g; $sth->{insert_share}->execute($hostID,$share, $drop_down ,undef); return $dbh->last_insert_id(undef,undef,'shares',undef); } sub found_in_db { my @data = @_; shift @data; my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_; return $beenThere->{$key} if (defined($beenThere->{$key})); $sth->{file_in_db} ||= $dbh->prepare(qq{ SELECT 1 FROM files WHERE shareID = ? and path = ? and date = ? and size = ? LIMIT 1 }); my @param = ($shareID,$path,$date,$size); $sth->{file_in_db}->execute(@param); my $rows = $sth->{file_in_db}->rows; print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3); $beenThere->{$key}++; $sth->{'insert_files'}->execute(@data) unless ($rows); return $rows; } #################################################### # recursing through filesystem structure and # # and returning flattened files list # #################################################### sub recurseDir($$$$$$$$) { my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_; print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1); my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0); { # scope my @stack; print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2); my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir); # first, add all the entries in current directory foreach my $path_key (keys %{$filesInBackup}) { print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3); my @data = ( $shareID, $backupNum, $path_key, $filesInBackup->{$path_key}->{'relPath'}, $filesInBackup->{$path_key}->{'mtime'}, $filesInBackup->{$path_key}->{'type'}, $filesInBackup->{$path_key}->{'size'} ); my $key = join(" ", ( $shareID, $dir, $path_key, $filesInBackup->{$path_key}->{'mtime'}, $filesInBackup->{$path_key}->{'size'} )); my $found; if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) { print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2); if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) { $new_dirs++ unless ($found); print STDERR " dir\n" if ($debug >= 2); } else { $new_files++ unless ($found); print STDERR " file\n" if ($debug >= 2); } $size += $filesInBackup->{$path_key}->{'size'} || 0; } if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) { $nr_dirs++; my $full_path = $dir . '/' . $path_key; push @stack, $full_path; print STDERR "### store to stack: $full_path\n" if ($debug >= 3); # my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key}); # # $nr_files += $f; # $new_files += $nf; # $nr_dirs += $d; # $new_dirs += $nd; } else { $nr_files++; } } print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2); while ( my $dir = shift @stack ) { my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID); print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1); $nr_files += $f; $new_files += $nf; $nr_dirs += $d; $new_dirs += $nd; $size += $s; } } return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size); }