1 package BackupPC::Search::KinoSearch;
5 use KinoSearch::Index::Indexer;
6 use KinoSearch::Plan::Schema;
7 use KinoSearch::Analysis::PolyAnalyzer;
8 use KinoSearch::Plan::FullTextType;
9 use KinoSearch::Search::IndexSearcher;
10 use Data::Dump qw(dump);
12 # my $tokenizer = KinoSearch::Analysis::Tokenizer->new( pattern => '\\w' );
18 my $index_path = $Conf{KinoPath} || die "no KinoPath";
22 first_time_indexing => ! -d $index_path,
23 numeric_padding => [ qw(
30 warn "# ",dump($self);
36 return $self->{_indexer} if defined $self->{_indexer};
38 my $schema = KinoSearch::Plan::Schema->new;
41 my $case_folder = KinoSearch::Analysis::CaseFolder->new;
42 my $tokenizer = KinoSearch::Analysis::Tokenizer->new;
43 my $polyanalyzer = KinoSearch::Analysis::PolyAnalyzer->new(
44 analyzers => [ $case_folder, $tokenizer ],
47 my $ft_type = KinoSearch::Plan::FullTextType->new(
48 analyzer => $polyanalyzer,
50 my $blob_type = KinoSearch::Plan::BlobType->new( stored => 1 );
51 my $string_type = KinoSearch::Plan::StringType->new; # non-tokenized
52 my $num_type = KinoSearch::Plan::Int64Type->new( sortable => 1 );
53 my $sort_type = KinoSearch::Plan::StringType->new( sortable => 1 ); # non-tokenized
56 $schema->spec_field( name => $_, type => $string_type ) foreach ( qw/
63 # non-tokenized strings
64 $schema->spec_field( name => $_, type => $string_type ) foreach ( qw/
70 $schema->spec_field( name => $_, type => $sort_type ) foreach (qw/
76 $schema->spec_field( name => $_, type => $sort_type ) foreach (qw/
82 # tokenized magic columns for infix search
83 $schema->spec_field( name => '_file_path_split', type => $ft_type );
85 # $schema->spec_field( name => '_doc', type => $blob_type );
87 my $indexer = KinoSearch::Index::Indexer->new(
89 index => $self->{index},
93 warn "# created indexer";
95 return $self->{_indexer} = $indexer;
102 return $self->{_searcher} if $self->{_searcher};
104 KinoSearch::Search::IndexSearcher->new( index => $self->{index} )
108 my ($self,$row) = @_;
110 return 0 if $self->{first_time_indexing};
112 my $uri = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath};
113 my $hits = $self->searcher->hits( query => "_uri:$uri" );
116 $self->{stat}->{exists}->{ $hits->total_hits }++;
118 return $hits->total_hits;
121 sub _numeric_padding { sprintf "%011d", $_[0] }
124 my ($self,$row) = @_;
126 $row->{_uri} = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath};
127 my $path = $row->{filepath};
128 $path =~ s/(.)/$1 /g; # XXX our tokenize
129 $row->{_file_path_split} = $path;
131 $self->{stats}->{add_doc}++;
133 foreach my $col ( @{ $self->{numeric_padding} } ) {
134 $row->{$col} = _numeric_padding $row->{$col};
137 warn "XXX ",dump($row) if $ENV{DEBUG};
139 $self->indexer->add_doc( $row );
145 $self->indexer->commit;
146 warn "# commit index ", dump($self->{stats});
149 sub _field_lower_upper_term {
150 my ( $self, $field, $l, $u ) = @_;
151 my $numeric_padding = grep { /^$field$/ } @{ $self->{numeric_padding} };
154 $range->{lower_term} = $numeric_padding ? _numeric_padding $l : $l;
155 $range->{include_lower} = 1;
158 $range->{upper_term} = $numeric_padding ? _numeric_padding $u : $u;
159 $range->{include_upper} = 1;
162 $range->{field} = $field;
164 warn "# $field $l - $u numeric_padding:$numeric_padding ",dump($range);
170 my ( $self, $offset, $on_page, $sort, $q, $shareid, $backup_from, $backup_to, $files_from, $files_to ) = @_;
172 warn "# search $offset/$on_page [$q] shareid: $shareid backup: $backup_from - $backup_to files: $files_from - $files_to";
174 my $sort_field = (split(/_/,$sort,2))[0];
176 my $rules = [ KinoSearch::Search::SortRule->new( type => 'score' ) ];
177 $rules->[0] = KinoSearch::Search::SortRule->new( field => $sort_field, reverse => $sort =~ m/_a$/ ? 0 : 1 ) if $sort_field;
179 my $sort_spec = KinoSearch::Search::SortSpec->new( rules => $rules );
182 $split =~ s/(.)/$1 /g; # _file_path_split
183 my $split_query = KinoSearch::Search::TermQuery->new( field => '_file_path_split', term => $split );
184 #warn "XXX ",dump($split_query);
187 my $query_parser = KinoSearch::Search::QueryParser->new(
188 schema => $self->searcher->get_schema,
189 fields => ['_file_path_split'],
191 my $query = $query_parser->parse( '"' . $split . '"' );
196 push @and_query, KinoSearch::Search::TermQuery->new( field => 'shareid', term => $shareid );
199 if ( my $range = $self->_field_lower_upper_term( 'backup_date', $backup_from, $backup_to ) ) {
200 push @and_query, KinoSearch::Search::RangeQuery->new( %$range );
202 if ( my $range = $self->_field_lower_upper_term( 'date', $files_from, $files_to ) ) {
203 push @and_query, KinoSearch::Search::RangeQuery->new( %$range );
207 push @and_query, $query;
208 $query = KinoSearch::Search::ANDQuery->new( children => [ @and_query ] );
211 my $hits = $self->searcher->hits(
212 query => m/:/ ? $q : $query,
214 num_wanted => $on_page,
215 sort_spec => $sort_spec,
219 warn "# ", $hits->total_hits, " hits for $q\n";
221 return (0,[]) if $hits->total_hits == 0;
224 while ( my $hit = $hits->next ) {
225 warn "## hit = ",dump($hit) if $ENV{DEBUG};
226 push @$results, $hit;
229 return ( $hits->total_hits, $results );