X-Git-Url: http://git.rot13.org/?p=BackupPC.git;a=blobdiff_plain;f=lib%2FBackupPC%2FSearch%2FKinoSearch.pm;h=d0e77130d3b069af1afde3f2a4a0bc30a90460f1;hp=e6f7c6a3b3e42a91d18b8118c01cae5cf4376717;hb=HEAD;hpb=ae81c69bf053eed787bac83c224225446ac07449 diff --git a/lib/BackupPC/Search/KinoSearch.pm b/lib/BackupPC/Search/KinoSearch.pm index e6f7c6a..d0e7713 100644 --- a/lib/BackupPC/Search/KinoSearch.pm +++ b/lib/BackupPC/Search/KinoSearch.pm @@ -11,6 +11,8 @@ use Data::Dump qw(dump); # my $tokenizer = KinoSearch::Analysis::Tokenizer->new( pattern => '\\w' ); +# numeric_padding values are used in range search, and muse be sortable + sub new { my $class = shift @_; my %Conf = @_; @@ -20,8 +22,13 @@ sub new { my $self = bless { index => $index_path, first_time_indexing => ! -d $index_path, + numeric_padding => [ qw( + backup_date + date + ) ], + }, $class; - warn "# ",dump($self); + #warn "# ",dump($self); return $self; } @@ -46,21 +53,8 @@ sub indexer { my $num_type = KinoSearch::Plan::Int64Type->new( sortable => 1 ); my $sort_type = KinoSearch::Plan::StringType->new( sortable => 1 ); # non-tokenized -=for numeric-no-padding - fid - shareid - type - backupnum -=cut - $self->{numeric_padding} = [ qw/ - backup_date - date - size - / ]; - # numeric $schema->spec_field( name => $_, type => $string_type ) foreach ( qw/ - backup_date fid shareid type @@ -81,6 +75,7 @@ sub indexer { # sortable numeric $schema->spec_field( name => $_, type => $sort_type ) foreach (qw/ backupnum + backup_date date size /); @@ -96,7 +91,7 @@ sub indexer { create => 1, ); - warn "# created indexer"; + #warn "# created indexer"; return $self->{_indexer} = $indexer; @@ -124,6 +119,8 @@ sub exists { return $hits->total_hits; } +sub _numeric_padding { sprintf "%010d", $_[0] } # pad up to 32bit number (timestamp) + sub add_doc { my ($self,$row) = @_; @@ -135,7 +132,7 @@ sub add_doc { $self->{stats}->{add_doc}++; foreach my $col ( @{ $self->{numeric_padding} } ) { - $row->{$col} = sprintf "%011d", $row->{$col}; + $row->{$col} = _numeric_padding $row->{$col}; } warn "XXX ",dump($row) if $ENV{DEBUG}; @@ -147,22 +144,26 @@ sub add_doc { sub commit { my $self = shift; $self->indexer->commit; - warn "# commit index ", dump($self->{stats}); + print STDERR "[commit]"; } sub _field_lower_upper_term { - my ( $field, $l, $u ) = @_; + my ( $self, $field, $l, $u ) = @_; + my $numeric_padding = grep { /^$field$/ } @{ $self->{numeric_padding} }; my $range; if ( $l ) { - $range->{lower_term} = $l; + $range->{lower_term} = $numeric_padding ? _numeric_padding $l : $l; $range->{include_lower} = 1; } if ( $u ) { - $range->{upper_term} = $u; + $range->{upper_term} = $numeric_padding ? _numeric_padding $u : $u; $range->{include_upper} = 1; } - $range->{field} = $field if $range; - warn "# $field $l - $u ",dump($range); + if ( $range ) { + $range->{field} = $field; + + #warn "# $field $l - $u numeric_padding:$numeric_padding ",dump($range); + } return $range; } @@ -196,10 +197,10 @@ sub search { push @and_query, KinoSearch::Search::TermQuery->new( field => 'shareid', term => $shareid ); } - if ( my $range = _field_lower_upper_term( 'backup_date', $backup_from, $backup_to ) ) { + if ( my $range = $self->_field_lower_upper_term( 'backup_date', $backup_from, $backup_to ) ) { push @and_query, KinoSearch::Search::RangeQuery->new( %$range ); } - if ( my $range = _field_lower_upper_term( 'date', $files_from, $files_to ) ) { + if ( my $range = $self->_field_lower_upper_term( 'date', $files_from, $files_to ) ) { push @and_query, KinoSearch::Search::RangeQuery->new( %$range ); }