X-Git-Url: http://git.rot13.org/?p=BackupPC.git;a=blobdiff_plain;f=lib%2FBackupPC%2FSearch%2FKinoSearch.pm;h=d12577d3d6cf9b2e22489f0a5543f2c85cd9b3af;hp=3efcf8c67f348f7d9239de87b0d4816da0883e42;hb=316ee1fea2346566054cdab9cfecc0a8b2f662dd;hpb=dbc58948795a708b388ad17d6c51eee454a8b582 diff --git a/lib/BackupPC/Search/KinoSearch.pm b/lib/BackupPC/Search/KinoSearch.pm index 3efcf8c..d12577d 100644 --- a/lib/BackupPC/Search/KinoSearch.pm +++ b/lib/BackupPC/Search/KinoSearch.pm @@ -17,7 +17,10 @@ sub new { my $index_path = $Conf{KinoPath} || die "no KinoPath"; - my $self = bless { index => $index_path }, $class; + my $self = bless { + index => $index_path, + first_time_indexing => ! -d $index_path, + }, $class; warn "# ",dump($self); return $self; } @@ -40,22 +43,39 @@ sub indexer { ); my $blob_type = KinoSearch::Plan::BlobType->new( stored => 1 ); my $string_type = KinoSearch::Plan::StringType->new; # non-tokenized - my $num_type = KinoSearch::Plan::Int64Type->new; + my $num_type = KinoSearch::Plan::Int64Type->new( sortable => 1 ); + my $sort_type = KinoSearch::Plan::StringType->new( sortable => 1 ); # non-tokenized + # numeric $schema->spec_field( name => $_, type => $string_type ) foreach ( qw/ backup_date backupnum - date fid shareid - size type / ); + # non-tokenized strings $schema->spec_field( name => $_, type => $string_type ) foreach ( qw/ - _uri _file_path_split filepath hname sname + _uri + hname + /); + + # sortable + $schema->spec_field( name => $_, type => $sort_type ) foreach (qw/ + sname + filepath + /); + + # sortable numeric + $schema->spec_field( name => $_, type => $sort_type ) foreach (qw/ + date + size /); + # tokenized magic columns for infix search + $schema->spec_field( name => '_file_path_split', type => $ft_type ); + # $schema->spec_field( name => '_doc', type => $blob_type ); my $indexer = KinoSearch::Index::Indexer->new( @@ -64,8 +84,6 @@ sub indexer { create => 1, ); - $indexer->commit; # make sure that index exists - warn "# created indexer"; return $self->{_indexer} = $indexer; @@ -83,8 +101,14 @@ sub searcher { sub exists { my ($self,$row) = @_; + return 0 if $self->{first_time_indexing}; + my $uri = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath}; my $hits = $self->searcher->hits( query => "_uri:$uri" ); + + + $self->{stat}->{exists}->{ $hits->total_hits }++; + return $hits->total_hits; } @@ -98,6 +122,8 @@ sub add_doc { warn "XXX ",dump($row) if $ENV{DEBUG}; + $self->{stats}->{add_doc}++; + $self->indexer->add_doc( $row ); } @@ -105,7 +131,7 @@ sub add_doc { sub commit { my $self = shift; $self->indexer->commit; - warn "# commit index"; + warn "# commit index ", dump($self->{stats}); } sub search { @@ -120,9 +146,13 @@ sub search { my $sort_spec = KinoSearch::Search::SortSpec->new( rules => $rules ); -# $q =~ s/(.)/$1 /g; + my $split = $q; + $split =~ s/(.)/$1 /g; # _file_path_split + $split = qq{"$split"}; # exact ordering my $hits = $self->searcher->hits( - query => $q, + query => $split, + offset => $offset, + num_wanted => $on_page, sort_spec => $sort_spec, );