r8411@llin: dpavlin | 2005-10-10 14:06:48 +0200
[BackupPC.git] / bin / BackupPC_updatedb
index e433555..8336cc0 100755 (executable)
@@ -11,8 +11,10 @@ use Getopt::Std;
 use Time::HiRes qw/time/;
 use File::Pid;
 use POSIX qw/strftime/;
 use Time::HiRes qw/time/;
 use File::Pid;
 use POSIX qw/strftime/;
+use BackupPC::SearchLib;
 
 use constant BPC_FTYPE_DIR => 5;
 
 use constant BPC_FTYPE_DIR => 5;
+use constant EST_CHUNK => 100000;
 
 my $debug = 0;
 $|=1;
 
 my $debug = 0;
 $|=1;
@@ -40,16 +42,15 @@ my $beenThere = {};
 
 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
 my $user = $Conf{SearchUser} || '';
 
 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
 my $user = $Conf{SearchUser} || '';
-my $index_path = $Conf{HyperEstraierIndex};
-$index_path = $TopDir . '/' . $index_path;
-$index_path =~ s#//#/#g;
 
 
+my $use_hest = $Conf{HyperEstraierIndex};
+my ($index_path, $index_node_url) = BackupPC::SearchLib::getHyperEstraier_url($use_hest);
 
 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
 
 my %opt;
 
 
 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
 
 my %opt;
 
-if ( !getopts("cdm:v:i", \%opt ) ) {
+if ( !getopts("cdm:v:ij", \%opt ) ) {
        print STDERR <<EOF;
 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
 
        print STDERR <<EOF;
 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
 
@@ -58,7 +59,12 @@ Options:
        -d      delete database before import
        -m num  import just num increments for one host
        -v num  set verbosity (debug) level (default $debug)
        -d      delete database before import
        -m num  import just num increments for one host
        -v num  set verbosity (debug) level (default $debug)
-       -i      update HyperEstraier full text index
+       -i      update Hyper Estraier full text index
+       -j      update full text, don't check existing files
+
+Option -j is variation on -i. It will allow faster initial creation
+of full-text index from existing database.
+
 EOF
        exit 1;
 }
 EOF
        exit 1;
 }
@@ -84,129 +90,163 @@ sub curr_time {
 }
 
 my $hest_db;
 }
 
 my $hest_db;
+my $hest_node;
+
+sub signal {
+       my($sig) = @_;
+       if ($hest_db) {
+               print "\nCaught a SIG$sig--syncing database and shutting down\n";
+               $hest_db->sync();
+               $hest_db->close();
+       }
+       exit(0);
+}
+
+$SIG{'INT'}  = \&signal;
+$SIG{'QUIT'} = \&signal;
 
 sub hest_update {
 
        my ($host_id, $share_id, $num) = @_;
 
 
 sub hest_update {
 
        my ($host_id, $share_id, $num) = @_;
 
-       print curr_time," updating HyperEstraier: select files";
-
-       my $t = time();
-
-       my $where = '';
-       if ($host_id && $share_id && $num) {
-               $where = qq{
-               WHERE
-                       hosts.id = ? AND
-                       shares.id = ? AND
-                       files.backupnum = ?
-               };
-       }
-
-       my $sth = $dbh->prepare(qq{
-               SELECT
-                       files.id                        AS fid,
-                       hosts.name                      AS hname,
-                       shares.name                     AS sname,
-                       -- shares.share                 AS sharename,
-                       files.backupnum                 AS backupnum,
-                       -- files.name                   AS filename,
-                       files.path                      AS filepath,
-                       files.date                      AS date,
-                       files.type                      AS type,
-                       files.size                      AS size,
-                       files.shareid                   AS shareid,
-                       backups.date                    AS backup_date
-               FROM files 
-                       INNER JOIN shares       ON files.shareID=shares.ID
-                       INNER JOIN hosts        ON hosts.ID = shares.hostID
-                       INNER JOIN backups      ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
-               $where
-       });
+       my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n";
 
 
-       $sth->execute(@_);
-       my $results = $sth->rows;
-
-       if ($results == 0) {
-               print " - no files, skipping\n";
+       unless ($use_hest) {
+               print STDERR "HyperEstraier support not enabled in configuration\n";
                return;
        }
 
                return;
        }
 
-       my $dot = int($results / 15) || 1;
+       print curr_time," updating HyperEstraier:";
+
+       my $t = time();
 
 
-       print " $results ($dot/#)";
+       my $offset = 0;
+       my $added = 0;
 
 
-       sub fmt_date {
-               my $t = shift || return;
-               my $iso = BackupPC::Lib::timeStamp($t);
-               $iso =~ s/\s/T/;
-               return $iso;
+       print " opening index $use_hest";
+       if ($index_path) {
+               $hest_db = HyperEstraier::Database->new();
+               $hest_db->open($TopDir . $index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
+               print " directly";
+       } elsif ($index_node_url) {
+               $hest_node ||= HyperEstraier::Node->new($index_node_url);
+               $hest_node->set_auth('admin', 'admin');
+               print " via node URL";
+       } else {
+               die "don't know how to use HyperEstraier Index $use_hest";
        }
        }
+       print " increment is " . EST_CHUNK . " files:";
+
+       my $results = 0;
+
+       do {
+
+               my $where = '';
+               my @data;
+               if ($host_id && $share_id && $num) {
+                       $where = qq{
+                       WHERE
+                               hosts.id = ? AND
+                               shares.id = ? AND
+                               files.backupnum = ?
+                       };
+                       @data = ( $host_id, $share_id, $num );
+               }
 
 
-       my $max = int($results / $dot);
+               my $limit = sprintf('LIMIT '.EST_CHUNK.' OFFSET %d', $offset);
+
+               my $sth = $dbh->prepare(qq{
+                       SELECT
+                               files.id                        AS fid,
+                               hosts.name                      AS hname,
+                               shares.name                     AS sname,
+                               -- shares.share                 AS sharename,
+                               files.backupnum                 AS backupnum,
+                               -- files.name                   AS filename,
+                               files.path                      AS filepath,
+                               files.date                      AS date,
+                               files.type                      AS type,
+                               files.size                      AS size,
+                               files.shareid                   AS shareid,
+                               backups.date                    AS backup_date
+                       FROM files 
+                               INNER JOIN shares       ON files.shareID=shares.ID
+                               INNER JOIN hosts        ON hosts.ID = shares.hostID
+                               INNER JOIN backups      ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
+                       $where
+                       $limit
+               });
+
+               $sth->execute(@data);
+               $results = $sth->rows;
+
+               if ($results == 0) {
+                       print " - no new files\n";
+                       last;
+               }
 
 
-       print ", opening index $index_path...";
-       use HyperEstraier;
-       my $db = HyperEstraier::Database->new();
+               sub fmt_date {
+                       my $t = shift || return;
+                       my $iso = BackupPC::Lib::timeStamp($t);
+                       $iso =~ s/\s/T/;
+                       return $iso;
+               }
 
 
-#      unless ($hest_db) {
-#              print " open reader";
-#              $hest_db = HyperEstraier::Database->new();
-#
-#      }
+               while (my $row = $sth->fetchrow_hashref()) {
 
 
+                       my $fid = $row->{'fid'} || die "no fid?";
+                       my $uri = 'file:///' . $fid;
 
 
-       $db->open($index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
+                       unless ($skip_check) {
+                               my $id = ($hest_db || $hest_node)->uri_to_id($uri);
+                               next unless ($id == -1);
+                       }
 
 
-       my $added = 0;
+                       # create a document object 
+                       my $doc = HyperEstraier::Document->new;
 
 
-       while (my $row = $sth->fetchrow_hashref()) {
+                       # add attributes to the document object 
+                       $doc->add_attr('@uri', $uri);
 
 
-               my $fid = $row->{'fid'} || die "no fid?";
-               my $uri = 'file:///' . $fid;
+                       foreach my $c (@{ $sth->{NAME} }) {
+                               $doc->add_attr($c, $row->{$c}) if ($row->{$c});
+                       }
 
 
-               my $id = $db->uri_to_id($uri);
-               next unless ($id == -1);
+                       #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
 
 
-               # create a document object 
-               my $doc = HyperEstraier::Document->new;
+                       # add the body text to the document object 
+                       my $path = $row->{'filepath'};
+                       $doc->add_text($path);
+                       $path =~ s/(.)/$1 /g;
+                       $doc->add_hidden_text($path);
 
 
-               # add attributes to the document object 
-               $doc->add_attr('@uri', $uri);
+                       print STDERR $doc->dump_draft,"\n" if ($debug > 1);
 
 
-               foreach my $c (@{ $sth->{NAME} }) {
-                       $doc->add_attr($c, $row->{$c}) if ($row->{$c});
+                       # register the document object to the database
+                       if ($hest_db) {
+                               $hest_db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
+                       } elsif ($hest_node) {
+                               $hest_node->put_doc($doc);
+                       } else {
+                               die "not supported";
+                       }
+                       $added++;
                }
 
                }
 
-               #$doc->add_attr('@cdate', fmt_date($row->{'date'}));
-
-               # add the body text to the document object 
-               my $path = $row->{'filepath'};
-               $doc->add_text($path);
-               $path =~ s/(.)/$1 /g;
-               $doc->add_hidden_text($path);
+               print " $added";
+               $hest_db->sync() if ($index_path);
 
 
-               print STDERR $doc->dump_draft,"\n" if ($debug > 1);
+               $offset += EST_CHUNK;
 
 
-               # register the document object to the database
-               $db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
-
-               $added++;
-               if ($added % $dot == 0) {
-                       print "$max ";
-                       $max--;
-               }
+       } while ($results == EST_CHUNK);
 
 
+       if ($index_path) {
+               print ", close";
+               $hest_db->close();
        }
 
        }
 
-       print "sync $added new files";
-       $db->sync();
-       print ", close";
-       $db->close();
-
        my $dur = (time() - $t) || 1;
        my $dur = (time() - $t) || 1;
-       printf(" [%.2f/s new %.2f/s dur: %s]\n",
-               ( $results / $dur ),
+       printf(" [%.2f/s dur: %s]\n",
                ( $added / $dur ),
                fmt_time($dur)
        );
                ( $added / $dur ),
                fmt_time($dur)
        );
@@ -216,11 +256,12 @@ sub hest_update {
 
 
 ## update index ##
 
 
 ## update index ##
-if (($opt{i} || ($index_path && ! -e $index_path)) && !$opt{c}) {
+if (($opt{i} || $opt{j} || ($index_path && ! -e $index_path)) && !$opt{c}) {
        # update all
        print "force update of HyperEstraier index ";
        print "importing existing data" unless (-e $index_path);
        print "by -i flag" if ($opt{i});
        # update all
        print "force update of HyperEstraier index ";
        print "importing existing data" unless (-e $index_path);
        print "by -i flag" if ($opt{i});
+       print "by -j flag" if ($opt{j});
        print "\n";
        hest_update();
 }
        print "\n";
        hest_update();
 }
@@ -229,9 +270,10 @@ if (($opt{i} || ($index_path && ! -e $index_path)) && !$opt{c}) {
 if ($opt{c}) {
        sub do_index {
                my $index = shift || return;
 if ($opt{c}) {
        sub do_index {
                my $index = shift || return;
-               my ($table,$col,$unique) = split(/_/, $index);
+               my ($table,$col,$unique) = split(/:/, $index);
                $unique ||= '';
                $unique ||= '';
-               $index =~ s/,/_/g;
+               $index =~ s/\W+/_/g;
+               print "$index on $table($col)" . ( $unique ? "u" : "" ) . " ";
                $dbh->do(qq{ create $unique index $index on $table($col) });
        }
 
                $dbh->do(qq{ create $unique index $index on $table($col) });
        }
 
@@ -250,65 +292,97 @@ if ($opt{c}) {
                        ID      SERIAL          PRIMARY KEY,
                        hostID  INTEGER         NOT NULL references hosts(id),
                        name    VARCHAR(30)     NOT NULL,
                        ID      SERIAL          PRIMARY KEY,
                        hostID  INTEGER         NOT NULL references hosts(id),
                        name    VARCHAR(30)     NOT NULL,
-                       share   VARCHAR(200)    NOT NULL,
-                       localpath VARCHAR(200)      
+                       share   VARCHAR(200)    NOT NULL
                );            
        });
                );            
        });
+
+       $dbh->do(qq{
+               create table dvds (
+                       ID      SERIAL          PRIMARY KEY, 
+                       num     INTEGER         NOT NULL,
+                       name    VARCHAR(255)    NOT NULL,
+                       mjesto  VARCHAR(255)
+               );
+       });
        
        $dbh->do(qq{
                create table backups (
        
        $dbh->do(qq{
                create table backups (
+                       id      serial,
                        hostID  INTEGER         NOT NULL references hosts(id),
                        num     INTEGER         NOT NULL,
                        date    integer         NOT NULL, 
                        type    CHAR(4)         not null,
                        shareID integer         not null references shares(id),
                        hostID  INTEGER         NOT NULL references hosts(id),
                        num     INTEGER         NOT NULL,
                        date    integer         NOT NULL, 
                        type    CHAR(4)         not null,
                        shareID integer         not null references shares(id),
-                       size    integer         not null,
-                       PRIMARY KEY(hostID, num, shareID) 
+                       size    bigint          not null,
+                       inc_size bigint         not null default -1,
+                       inc_deleted boolean     default false,
+                       PRIMARY KEY(id)
                );            
        });
 
                );            
        });
 
-       #do_index('backups_hostid,num_unique');
-
-       $dbh->do(qq{
-               create table dvds (
-                       ID      SERIAL          PRIMARY KEY, 
-                       num     INTEGER         NOT NULL,
-                       name    VARCHAR(255)    NOT NULL,
-                       mjesto  VARCHAR(255)
+       $dbh->do(qq{     
+               create table files (
+                       ID              SERIAL,
+                       shareID         INTEGER NOT NULL references shares(id),
+                       backupNum       INTEGER NOT NULL,
+                       name            VARCHAR(255) NOT NULL,
+                       path            VARCHAR(255) NOT NULL,
+                       date            integer NOT NULL,
+                       type            INTEGER NOT NULL,
+                       size            bigint  NOT NULL,
+                       primary key(id)
                );
        });
 
                );
        });
 
-       $dbh->do(qq{     
-               create table files (
-                       ID      SERIAL          PRIMARY KEY,  
-                       shareID INTEGER         NOT NULL references shares(id),
-                       backupNum  INTEGER      NOT NULL,
-                       name       VARCHAR(255) NOT NULL,
-                       path       VARCHAR(255) NOT NULL,
-                       date       integer      NOT NULL,
-                       type       INTEGER      NOT NULL,
-                       size       INTEGER      NOT NULL,
-                       dvdid      INTEGER      references dvds(id)     
+
+       $dbh->do( qq{
+               create table archive (
+                       id              serial,
+                       dvd_nr          int not null,
+                       total_size      bigint default -1,
+                       note            text,
+                       username        varchar(20) not null,
+                       date            timestamp default now(),
+                       primary key(id)
+               );      
+       }
+       );
+
+       $dbh->do( qq{
+               create table archive_backup
+               (
+                       archive_id      int not null references archive(id),
+                       backup_id       int not null references backups(id),
+                       primary key(archive_id, backup_id)
                );
        });
 
                );
        });
 
-       print "creating indexes:";
+       print "creating indexes: ";
 
        foreach my $index (qw(
 
        foreach my $index (qw(
-               hosts_name
-               backups_hostID
-               backups_num
-               shares_hostID
-               shares_name
-               files_shareID
-               files_path
-               files_name
-               files_date
-               files_size
+               hosts:name
+               backups:hostID
+               backups:num
+               backups:shareID
+               shares:hostID
+               shares:name
+               files:shareID
+               files:path
+               files:name
+               files:date
+               files:size
+               archive:dvd_nr
        )) {
        )) {
-               print " $index";
                do_index($index);
        }
                do_index($index);
        }
+
+       print " creating sequence: ";
+       foreach my $seq (qw/dvd_nr/) {
+               print "$seq ";
+               $dbh->do( qq{ CREATE SEQUENCE $seq } );
+       }
+
+
        print "...\n";
 
        $dbh->commit;
        print "...\n";
 
        $dbh->commit;
@@ -424,7 +498,7 @@ foreach my $host_key (keys %{$hosts}) {
                                $hostID,
                                $backupNum,
                                $backup->{'endTime'},
                                $hostID,
                                $backupNum,
                                $backup->{'endTime'},
-                               $backup->{'type'},
+                               substr($backup->{'type'},0,4),
                                $shareID,
                                $size,
                        );
                                $shareID,
                                $size,
                        );
@@ -440,7 +514,7 @@ foreach my $host_key (keys %{$hosts}) {
                                fmt_time($dur)
                        );
 
                                fmt_time($dur)
                        );
 
-                       hest_update($hostID, $shareID, $backupNum);
+                       hest_update($hostID, $shareID, $backupNum) if ($nf + $nd > 0);
                }
 
        }
                }
 
        }
@@ -469,14 +543,14 @@ sub getShareID() {
 
        $sth->{insert_share} ||= $dbh->prepare(qq{
                INSERT INTO shares 
 
        $sth->{insert_share} ||= $dbh->prepare(qq{
                INSERT INTO shares 
-                       (hostID,name,share,localpath
-               VALUES (?,?,?,?)
+                       (hostID,name,share) 
+               VALUES (?,?,?)
        });
 
        my $drop_down = $hostname . '/' . $share;
        $drop_down =~ s#//+#/#g;
 
        });
 
        my $drop_down = $hostname . '/' . $share;
        $drop_down =~ s#//+#/#g;
 
-       $sth->{insert_share}->execute($hostID,$share, $drop_down ,undef);
+       $sth->{insert_share}->execute($hostID,$share, $drop_down);
        return $dbh->last_insert_id(undef,undef,'shares',undef);
 }
 
        return $dbh->last_insert_id(undef,undef,'shares',undef);
 }