1 package BackupPC::Search::KinoSearch;
5 use KinoSearch::Index::Indexer;
6 use KinoSearch::Plan::Schema;
7 use KinoSearch::Analysis::PolyAnalyzer;
8 use KinoSearch::Plan::FullTextType;
9 use KinoSearch::Search::IndexSearcher;
10 use Data::Dump qw(dump);
12 # my $tokenizer = KinoSearch::Analysis::Tokenizer->new( pattern => '\\w' );
17 my $schema = KinoSearch::Plan::Schema->new;
20 my $case_folder = KinoSearch::Analysis::CaseFolder->new;
21 my $tokenizer = KinoSearch::Analysis::Tokenizer->new;
22 my $polyanalyzer = KinoSearch::Analysis::PolyAnalyzer->new(
23 analyzers => [ $case_folder, $tokenizer ],
26 my $ft_type = KinoSearch::Plan::FullTextType->new(
27 analyzer => $polyanalyzer,
29 my $blob_type = KinoSearch::Plan::BlobType->new( stored => 1 );
30 my $string_type = KinoSearch::Plan::StringType->new; # non-tokenized
31 my $num_type = KinoSearch::Plan::Int64Type->new;
33 $schema->spec_field( name => $_, type => $string_type ) foreach ( qw/
43 $schema->spec_field( name => $_, type => $string_type ) foreach ( qw/
44 _uri _file_path_split filepath hname sname
47 # $schema->spec_field( name => '_doc', type => $blob_type );
49 my $index_path = '/tmp/kinosearch'; # FIXME
51 my $indexer = KinoSearch::Index::Indexer->new(
57 warn "# using $index_path";
59 $indexer->commit; # make sure that index exists
63 searcher => KinoSearch::Search::IndexSearcher->new(
76 my $uri = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath};
78 my $hits = $self->{searcher}->hits( query => "_uri:$uri" );
79 return $hits->total_hits;
85 $row->{_uri} = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath};
86 my $path = $row->{filepath};
87 $path =~ s/(.)/$1 /g; # XXX our tokenize
88 $row->{_file_path_split} = $path;
90 warn "XXX ",dump($row) if $ENV{DEBUG};
92 $self->{indexer}->add_doc( $row );
98 $self->{indexer}->commit;
99 warn "# commit index";