From d777459c88e0a9ecf8d012f0cdffd63efe3244fe Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Fri, 7 Jan 2011 15:58:51 +0000 Subject: [PATCH] begin of KinoSearch integration --- lib/BackupPC/Search/KinoSearch.pm | 94 +++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 lib/BackupPC/Search/KinoSearch.pm diff --git a/lib/BackupPC/Search/KinoSearch.pm b/lib/BackupPC/Search/KinoSearch.pm new file mode 100644 index 0000000..d7f7868 --- /dev/null +++ b/lib/BackupPC/Search/KinoSearch.pm @@ -0,0 +1,94 @@ +package BackupPC::Search::KinoSearch; +use warnings; +use strict; + +use KinoSearch::Index::Indexer; +use KinoSearch::Plan::Schema; +use KinoSearch::Analysis::PolyAnalyzer; +use KinoSearch::Plan::FullTextType; +use KinoSearch::Search::IndexSearcher; +use Data::Dump qw(dump); + +# my $tokenizer = KinoSearch::Analysis::Tokenizer->new( pattern => '\\w' ); + +sub new { + my ( $class ) = @_; + + my $schema = KinoSearch::Plan::Schema->new; + my $polyanalyzer = KinoSearch::Analysis::PolyAnalyzer->new( + language => 'en', + ); + my $ft_type = KinoSearch::Plan::FullTextType->new( + analyzer => $polyanalyzer, + ); + my $blob_type = KinoSearch::Plan::BlobType->new( stored => 1 ); + my $string_type = KinoSearch::Plan::StringType->new; # non-tokenized + my $num_type = KinoSearch::Plan::Int64Type->new; + + $schema->spec_field( name => $_, type => $string_type ) foreach ( qw/ + backup_date + backupnum + date + fid + shareid + size + type + / ); + + $schema->spec_field( name => $_, type => $string_type ) foreach ( qw/ + _uri filepath hname sname + /); + +# $schema->spec_field( name => '_doc', type => $blob_type ); + + my $index_path = '/tmp/kinosearch'; # FIXME + + my $indexer = KinoSearch::Index::Indexer->new( + schema => $schema, + index => $index_path, + create => 1, + ); + + warn "# using $index_path"; + + $indexer->commit; # make sure that index exists + + my $self = bless { + indexer => $indexer, + searcher => KinoSearch::Search::IndexSearcher->new( + index => $index_path, + ), + + }, $class; + return $self; +} + +sub exists { + my ($self,$row) = @_; + + return 0; # FIXME + + my $uri = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath}; + warn "# exists $uri"; + my $hits = $self->{searcher}->hits( query => "_uri:$uri" ); + return $hits->total_hits; +} + +sub add_doc { + my ($self,$row) = @_; + + warn "XXX ",dump($row); + + $row->{_uri} = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath}; + + $self->{indexer}->add_doc( $row ); + +} + +sub commit { + my $self = shift; + $self->{indexer}->commit; + warn "# commit index"; +} + +1; -- 2.20.1