begin of KinoSearch integration
authorDobrica Pavlinusic <dpavlin@rot13.org>
Fri, 7 Jan 2011 15:58:51 +0000 (15:58 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Fri, 7 Jan 2011 15:58:51 +0000 (15:58 +0000)
lib/BackupPC/Search/KinoSearch.pm [new file with mode: 0644]

diff --git a/lib/BackupPC/Search/KinoSearch.pm b/lib/BackupPC/Search/KinoSearch.pm
new file mode 100644 (file)
index 0000000..d7f7868
--- /dev/null
@@ -0,0 +1,94 @@
+package BackupPC::Search::KinoSearch;
+use warnings;
+use strict;
+
+use KinoSearch::Index::Indexer;
+use KinoSearch::Plan::Schema;
+use KinoSearch::Analysis::PolyAnalyzer;
+use KinoSearch::Plan::FullTextType;
+use KinoSearch::Search::IndexSearcher;
+use Data::Dump qw(dump);
+
+# my $tokenizer = KinoSearch::Analysis::Tokenizer->new( pattern => '\\w' );
+
+sub new {
+       my ( $class ) = @_;
+
+       my $schema = KinoSearch::Plan::Schema->new;
+       my $polyanalyzer = KinoSearch::Analysis::PolyAnalyzer->new(
+           language => 'en',
+       );
+       my $ft_type = KinoSearch::Plan::FullTextType->new(
+           analyzer => $polyanalyzer,
+       );
+       my $blob_type = KinoSearch::Plan::BlobType->new( stored => 1 );
+       my $string_type = KinoSearch::Plan::StringType->new; # non-tokenized
+       my $num_type = KinoSearch::Plan::Int64Type->new;
+
+       $schema->spec_field( name => $_, type => $string_type ) foreach ( qw/
+               backup_date
+               backupnum
+               date
+               fid
+               shareid
+               size
+               type
+       / );
+
+       $schema->spec_field( name => $_, type => $string_type ) foreach ( qw/
+               _uri filepath hname sname
+       /);
+
+#      $schema->spec_field( name => '_doc', type => $blob_type );
+
+       my $index_path = '/tmp/kinosearch'; # FIXME
+
+       my $indexer = KinoSearch::Index::Indexer->new(
+               schema => $schema,
+               index  => $index_path,
+               create => 1,
+       );
+
+       warn "# using $index_path";
+
+       $indexer->commit; # make sure that index exists
+
+       my $self = bless {
+               indexer => $indexer,
+               searcher => KinoSearch::Search::IndexSearcher->new(
+                       index => $index_path,
+               ),
+
+       }, $class;
+       return $self;
+}
+
+sub exists {
+       my ($self,$row) = @_;
+
+       return 0; # FIXME
+
+       my $uri = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath};
+       warn "# exists $uri";
+       my $hits = $self->{searcher}->hits( query => "_uri:$uri" );
+       return $hits->total_hits;
+}
+
+sub add_doc {
+       my ($self,$row) = @_;
+
+       warn "XXX ",dump($row);
+
+       $row->{_uri} = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath};
+
+       $self->{indexer}->add_doc( $row );
+
+}
+
+sub commit {
+       my $self = shift;
+       $self->{indexer}->commit;
+       warn "# commit index";
+}
+
+1;