package BackupPC::Search::KinoSearch; use warnings; use strict; use KinoSearch::Index::Indexer; use KinoSearch::Plan::Schema; use KinoSearch::Analysis::PolyAnalyzer; use KinoSearch::Plan::FullTextType; use KinoSearch::Search::IndexSearcher; use Data::Dump qw(dump); # my $tokenizer = KinoSearch::Analysis::Tokenizer->new( pattern => '\\w' ); sub new { my $class = shift @_; my %Conf = @_; my $index_path = $Conf{KinoPath} || die "no KinoPath"; my $self = bless { index => $index_path, first_time_indexing => ! -d $index_path, }, $class; warn "# ",dump($self); return $self; } sub indexer { my $self = shift; return $self->{_indexer} if defined $self->{_indexer}; my $schema = KinoSearch::Plan::Schema->new; my $case_folder = KinoSearch::Analysis::CaseFolder->new; my $tokenizer = KinoSearch::Analysis::Tokenizer->new; my $polyanalyzer = KinoSearch::Analysis::PolyAnalyzer->new( analyzers => [ $case_folder, $tokenizer ], ); my $ft_type = KinoSearch::Plan::FullTextType->new( analyzer => $polyanalyzer, ); my $blob_type = KinoSearch::Plan::BlobType->new( stored => 1 ); my $string_type = KinoSearch::Plan::StringType->new; # non-tokenized my $num_type = KinoSearch::Plan::Int64Type->new; $schema->spec_field( name => $_, type => $string_type ) foreach ( qw/ backup_date backupnum date fid shareid size type / ); $schema->spec_field( name => $_, type => $string_type ) foreach ( qw/ _uri _file_path_split filepath hname sname /); # $schema->spec_field( name => '_doc', type => $blob_type ); my $indexer = KinoSearch::Index::Indexer->new( schema => $schema, index => $self->{index}, create => 1, ); warn "# created indexer"; return $self->{_indexer} = $indexer; }; our $searcher; sub searcher { my $self = shift; return $self->{_searcher} if $self->{_searcher}; $self->{_searcher} = KinoSearch::Search::IndexSearcher->new( index => $self->{index} ) } sub exists { my ($self,$row) = @_; return 0 if $self->{first_time_indexing}; my $uri = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath}; my $hits = $self->searcher->hits( query => "_uri:$uri" ); $self->{stat}->{exists}->{ $hits->total_hits }++; return $hits->total_hits; } sub add_doc { my ($self,$row) = @_; $row->{_uri} = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath}; my $path = $row->{filepath}; $path =~ s/(.)/$1 /g; # XXX our tokenize $row->{_file_path_split} = $path; warn "XXX ",dump($row) if $ENV{DEBUG}; $self->{stats}->{add_doc}++; $self->indexer->add_doc( $row ); } sub commit { my $self = shift; $self->indexer->commit; warn "# commit index ", dump($self->{stats}); } sub search { my ( $self, $offset, $on_page, $sort, $q, $shareid, $backup_from, $backup_to, $files_from, $files_to ) = @_; warn "# search $offset/$on_page [$q] shareid: $shareid backup: $backup_from - $backup_to files: $files_from - $files_to"; my $sort_field = (split(/_/,$sort,2))[0]; my $rules = [ KinoSearch::Search::SortRule->new( type => 'score' ) ]; $rules->[0] = KinoSearch::Search::SortRule->new( field => $sort_field, reverse => $sort =~ m/_a$/ ? 0 : 1 ) if $sort_field; my $sort_spec = KinoSearch::Search::SortSpec->new( rules => $rules ); # $q =~ s/(.)/$1 /g; my $hits = $self->searcher->hits( query => $q, sort_spec => $sort_spec, ); warn "# ", $hits->total_hits, " hits for $q\n"; return (0,[]) if $hits->total_hits == 0; my $results; while ( my $hit = $hits->next ) { warn "XXX ",dump($hit); push @$results, $hit; } return ( $hits->total_hits, $results ); } 1;