From: Dobrica Pavlinusic Date: Fri, 7 Jan 2011 12:15:23 +0000 (+0000) Subject: begin split of full-text into own module X-Git-Url: http://git.rot13.org/?p=BackupPC.git;a=commitdiff_plain;h=2b2ab40d38c95450034bc373a9acee8775bd53fa begin split of full-text into own module This will enable usage of other search engines --- diff --git a/bin/BackupPC_updatedb b/bin/BackupPC_updatedb index 9315026..69725af 100755 --- a/bin/BackupPC_updatedb +++ b/bin/BackupPC_updatedb @@ -15,6 +15,8 @@ use BackupPC::SearchLib; use Cwd qw/abs_path/; use Data::Dump qw(dump); +use BackupPC::Search::Estraier; + use constant BPC_FTYPE_DIR => 5; use constant EST_CHUNK => 4096; @@ -106,37 +108,20 @@ sub curr_time { return strftime($t_fmt,localtime()); } -my $hest_node; - sub hest_update { my ($host_id, $share_id, $num) = @_; my $skip_check = $opt{j} && print STDERR "Skipping check for existing files -- this should be used only with initital import\n"; - unless ($index_node_url && $index_node_url =~ m#^http://#) { - print STDERR "HyperEstraier support not enabled or index node invalid\n" if ($debug); - $index_node_url = 0; - return; - } - - print curr_time," updating Hyper Estraier:"; + print curr_time," updating fulltext:"; my $t = time(); my $offset = 0; my $added = 0; - if ($index_node_url) { - print " opening index $index_node_url"; - $hest_node ||= Search::Estraier::Node->new( - url => $index_node_url, - user => 'admin', - passwd => 'admin', - croak_on_error => 1, - ); - print " via node URL"; - } + my $search = BackupPC::Search::Estraier->new( $index_node_url ); my $results = 0; @@ -196,37 +181,8 @@ sub hest_update { } while (my $row = $sth->fetchrow_hashref()) { - - my $uri = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath}; - if (! $skip_check && $hest_node) { - my $id = $hest_node->uri_to_id($uri); - next if ($id && $id == -1); - } - - # create a document object - my $doc = Search::Estraier::Document->new; - - # add attributes to the document object - $doc->add_attr('@uri', $uri); - - foreach my $c (@{ $sth->{NAME} }) { - print STDERR "attr $c = $row->{$c}\n" if ($debug > 2); - $doc->add_attr($c, $row->{$c}) if (defined($row->{$c})); - } - - #$doc->add_attr('@cdate', fmt_date($row->{'date'})); - - # add the body text to the document object - my $path = $row->{'filepath'}; - $doc->add_text($path); - $path =~ s/(.)/$1 /g; - $doc->add_hidden_text($path); - - print STDERR $doc->dump_draft,"\n" if ($debug > 1); - - # register the document object to the database - $hest_node->put_doc($doc) if ($hest_node); - + next if $search->exists( $row ); + $search->add_doc( $row ); $added++; } @@ -541,7 +497,6 @@ foreach my $host_key (@hosts) { $host_nr++; # get backups for a host my @backups = $bpc->BackupInfoRead($hostname); -warn "XXXX ",dump(@backups); my $incs = scalar @backups; my $host_header = sprintf("host %s [%d/%d]: %d increments\n", diff --git a/lib/BackupPC/Search/Estraier.pm b/lib/BackupPC/Search/Estraier.pm new file mode 100644 index 0000000..ff2b287 --- /dev/null +++ b/lib/BackupPC/Search/Estraier.pm @@ -0,0 +1,70 @@ +package BackupPC::Search::Estraier; +use warnings; +use strict; + +use Search::Estraier; + +my $debug = $ENV{DEBUG} || 0; + +sub new { + my ( $class, $index_node_url ) = @_; + + warn "# using $index_node_url"; + + my $self = bless { + node => Search::Estraier::Node->new( + url => $index_node_url, + user => 'admin', + passwd => 'admin', + croak_on_error => 1, + ), + }, $class; + return $self; +} + +sub node { $_[0]->{node} }; + +sub exists { + my ( $self, $row ) = @_; + + my $uri = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath}; + my $id = $self->node->uri_to_id($uri); + return $id && $id != -1; +} + +sub add_doc { + my ( $self, $row ) = @_; + + # create a document object + my $doc = Search::Estraier::Document->new; + + my $uri = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath}; + # add attributes to the document object + $doc->add_attr('@uri', $uri); + + foreach my $c (keys %$row) { + print STDERR "attr $c = $row->{$c}\n" if ($debug > 2); + $doc->add_attr($c, $row->{$c}) if defined($row->{$c}); + } + + #$doc->add_attr('@cdate', fmt_date($row->{'date'})); + + # add the body text to the document object + my $path = $row->{'filepath'}; + $doc->add_text($path); + $path =~ s/(.)/$1 /g; + $doc->add_hidden_text($path); + + print STDERR $doc->dump_draft,"\n" if ($debug > 1); + + # register the document object to the database + $self->node->put_doc($doc); + +} + +sub commit { + my $self = shift; + warn "# commit not needed"; +} + +1;