X-Git-Url: http://git.rot13.org/?p=BackupPC.git;a=blobdiff_plain;f=lib%2FBackupPC%2FSearch%2FKinoSearch.pm;h=d0e77130d3b069af1afde3f2a4a0bc30a90460f1;hp=d12577d3d6cf9b2e22489f0a5543f2c85cd9b3af;hb=c2148616f886530652ead505e5781c1cf94199b5;hpb=316ee1fea2346566054cdab9cfecc0a8b2f662dd diff --git a/lib/BackupPC/Search/KinoSearch.pm b/lib/BackupPC/Search/KinoSearch.pm index d12577d..d0e7713 100644 --- a/lib/BackupPC/Search/KinoSearch.pm +++ b/lib/BackupPC/Search/KinoSearch.pm @@ -11,6 +11,8 @@ use Data::Dump qw(dump); # my $tokenizer = KinoSearch::Analysis::Tokenizer->new( pattern => '\\w' ); +# numeric_padding values are used in range search, and muse be sortable + sub new { my $class = shift @_; my %Conf = @_; @@ -20,8 +22,13 @@ sub new { my $self = bless { index => $index_path, first_time_indexing => ! -d $index_path, + numeric_padding => [ qw( + backup_date + date + ) ], + }, $class; - warn "# ",dump($self); + #warn "# ",dump($self); return $self; } @@ -48,8 +55,6 @@ sub indexer { # numeric $schema->spec_field( name => $_, type => $string_type ) foreach ( qw/ - backup_date - backupnum fid shareid type @@ -69,6 +74,8 @@ sub indexer { # sortable numeric $schema->spec_field( name => $_, type => $sort_type ) foreach (qw/ + backupnum + backup_date date size /); @@ -84,7 +91,7 @@ sub indexer { create => 1, ); - warn "# created indexer"; + #warn "# created indexer"; return $self->{_indexer} = $indexer; @@ -112,6 +119,8 @@ sub exists { return $hits->total_hits; } +sub _numeric_padding { sprintf "%010d", $_[0] } # pad up to 32bit number (timestamp) + sub add_doc { my ($self,$row) = @_; @@ -120,10 +129,14 @@ sub add_doc { $path =~ s/(.)/$1 /g; # XXX our tokenize $row->{_file_path_split} = $path; - warn "XXX ",dump($row) if $ENV{DEBUG}; - $self->{stats}->{add_doc}++; + foreach my $col ( @{ $self->{numeric_padding} } ) { + $row->{$col} = _numeric_padding $row->{$col}; + } + + warn "XXX ",dump($row) if $ENV{DEBUG}; + $self->indexer->add_doc( $row ); } @@ -131,7 +144,27 @@ sub add_doc { sub commit { my $self = shift; $self->indexer->commit; - warn "# commit index ", dump($self->{stats}); + print STDERR "[commit]"; +} + +sub _field_lower_upper_term { + my ( $self, $field, $l, $u ) = @_; + my $numeric_padding = grep { /^$field$/ } @{ $self->{numeric_padding} }; + my $range; + if ( $l ) { + $range->{lower_term} = $numeric_padding ? _numeric_padding $l : $l; + $range->{include_lower} = 1; + } + if ( $u ) { + $range->{upper_term} = $numeric_padding ? _numeric_padding $u : $u; + $range->{include_upper} = 1; + } + if ( $range ) { + $range->{field} = $field; + + #warn "# $field $l - $u numeric_padding:$numeric_padding ",dump($range); + } + return $range; } sub search { @@ -148,9 +181,36 @@ sub search { my $split = $q; $split =~ s/(.)/$1 /g; # _file_path_split - $split = qq{"$split"}; # exact ordering + my $split_query = KinoSearch::Search::TermQuery->new( field => '_file_path_split', term => $split ); +#warn "XXX ",dump($split_query); + + + my $query_parser = KinoSearch::Search::QueryParser->new( + schema => $self->searcher->get_schema, + fields => ['_file_path_split'], + ); + my $query = $query_parser->parse( '"' . $split . '"' ); + + my @and_query; + + if ( $shareid ) { + push @and_query, KinoSearch::Search::TermQuery->new( field => 'shareid', term => $shareid ); + } + + if ( my $range = $self->_field_lower_upper_term( 'backup_date', $backup_from, $backup_to ) ) { + push @and_query, KinoSearch::Search::RangeQuery->new( %$range ); + } + if ( my $range = $self->_field_lower_upper_term( 'date', $files_from, $files_to ) ) { + push @and_query, KinoSearch::Search::RangeQuery->new( %$range ); + } + + if ( @and_query ) { + push @and_query, $query; + $query = KinoSearch::Search::ANDQuery->new( children => [ @and_query ] ); + } + my $hits = $self->searcher->hits( - query => $split, + query => m/:/ ? $q : $query, offset => $offset, num_wanted => $on_page, sort_spec => $sort_spec, @@ -163,7 +223,7 @@ sub search { my $results; while ( my $hit = $hits->next ) { -warn "XXX ",dump($hit); + warn "## hit = ",dump($hit) if $ENV{DEBUG}; push @$results, $hit; }