# my $tokenizer = KinoSearch::Analysis::Tokenizer->new( pattern => '\\w' );
sub new {
- my ( $class ) = @_;
+ my $class = shift @_;
+ my %Conf = @_;
my $schema = KinoSearch::Plan::Schema->new;
+
+
+ my $case_folder = KinoSearch::Analysis::CaseFolder->new;
+ my $tokenizer = KinoSearch::Analysis::Tokenizer->new;
my $polyanalyzer = KinoSearch::Analysis::PolyAnalyzer->new(
- language => 'en',
+ analyzers => [ $case_folder, $tokenizer ],
);
+
my $ft_type = KinoSearch::Plan::FullTextType->new(
analyzer => $polyanalyzer,
);
/ );
$schema->spec_field( name => $_, type => $string_type ) foreach ( qw/
- _uri filepath hname sname
+ _uri _file_path_split filepath hname sname
/);
# $schema->spec_field( name => '_doc', type => $blob_type );
- my $index_path = '/tmp/kinosearch'; # FIXME
+ my $index_path = $Conf{KinoPath} || die "no KinoPath";
my $indexer = KinoSearch::Index::Indexer->new(
schema => $schema,
sub exists {
my ($self,$row) = @_;
- return 0; # FIXME
-
my $uri = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath};
- warn "# exists $uri";
my $hits = $self->{searcher}->hits( query => "_uri:$uri" );
return $hits->total_hits;
}
sub add_doc {
my ($self,$row) = @_;
- warn "XXX ",dump($row);
-
$row->{_uri} = $row->{hname} . ':' . $row->{sname} . '#' . $row->{backupnum} . ' ' . $row->{filepath};
+ my $path = $row->{filepath};
+ $path =~ s/(.)/$1 /g; # XXX our tokenize
+ $row->{_file_path_split} = $path;
+
+ warn "XXX ",dump($row) if $ENV{DEBUG};
$self->{indexer}->add_doc( $row );