use warnings;
use strict;
-use base qw/WebPAC::Common/;
-
-use KinoSearch::InvIndexer;
-use KinoSearch::Analysis::PolyAnalyzer;
+use base qw/WebPAC::Common WebPAC::Output Class::Accessor/;
+__PACKAGE__->mk_accessors(qw(
+ path
+ database
+ encoding
+ clean
+
+ index
+));
+
+use KinoSearch::Simple;
+use File::Path;
use Encode qw/from_to/;
use Data::Dump qw/dump/;
use Storable;
=head1 VERSION
-Version 0.03
+Version 0.04
=cut
-our $VERSION = '0.03';
+our $VERSION = '0.04';
=head1 SYNOPSIS
Open KinoSearch index
- my $est = new WebPAC::Output::KinoSearch(
- index_path => '/path/to/invindex',
- fields => qw/name of all filelds used/,
+ my $est = new WebPAC::Output::KinoSearch({
+ path => '/path/to/invindex',
database => 'demo',
- label => 'node label',
encoding => 'iso-8859-2',
clean => 1,
- );
+ });
Options are:
=over 4
-=item index_path
+=item path
path to KinoSearch index to use
-=item fields
-
-name of all fields used in this index
-
=item database
name of database from which data comes
-=item label
-
-label for node (optional)
-
=item encoding
character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
=cut
-sub new {
- my $class = shift;
- my $self = {@_};
- bless($self, $class);
+sub init {
+ my $self = shift;
my $log = $self->_get_logger;
#$log->debug("self: ", sub { dump($self) });
- foreach my $p (qw/index_path fields database/) {
- $log->logdie("need $p") unless ($self->{$p});
+ foreach my $p (qw/path database/) {
+ $log->logdie("need $p") unless ($self->$p);
}
- $log->logdie("fields is not ARRAY") unless (ref($self->{fields}) eq 'ARRAY');
-
- $self->{encoding} ||= 'ISO-8859-2';
+# $log->logdie("fields is not ARRAY") unless (ref($self->{fields}) eq 'ARRAY');
- $self->{index_path} .= '/' . $self->{database};
+ $self->encoding( 'ISO-8859-2' ) unless $self->encoding;
- $self->{clean} = 1 if (! -e $self->{index_path} . '/segments');
+ if ( ! -e $self->path ) {
+ mkpath $self->path || $log->logdie("can't create ", $self->path,": $!");
+ $log->info("created ", $self->path);
+ }
- $log->info("using", $self->{clean} ? ' new' : '', " index $self->{index_path} with encoding $self->{encoding}");
+ my $path = $self->path . '/' . $self->database;
- my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' );
+ $log->info("using index $path with encoding ", $self->encoding);
- $self->{invindex} = KinoSearch::InvIndexer->new(
- invindex => $self->{index_path},
- create => $self->{clean},
- analyzer => $analyzer,
+ my $index = KinoSearch::Simple->new(
+ path => $path,
+ language => 'en',
);
- my $fields_path = $self->{index_path} . '/fields.storable';
- $fields_path =~ s#//#/#g;
- if (-e $fields_path) {
- $self->{fields} = retrieve($fields_path) ||
- $log->warn("can't open $fields_path: $!");
- } else {
- $log->error("This will be dummy run since no fields statistics are found!");
- $log->error("You will have to re-run indexing to get search results!");
- $self->{dummy_run} = 1;
- }
- $self->{fields_path} = $fields_path;
-
- foreach my $f (@{ $self->{fields} }) {
- $self->{invindex}->spec_field(
- name => $f,
-# boost => 10,
- stored => 1,
- indexed => 1,
- vectorized => 0,
- );
- }
+ $log->logdie("can't open $path: $!") unless $index;
+
+ $self->index( $index );
- $self ? return $self : return undef;
}
=head2 add
-Adds one entry to database.
-
- $est->add(
- id => 42,
- ds => $ds,
- type => 'display',
- text => 'optional text from which snippet is created',
- );
-
-This function will create entries in index using following URI format:
+Adds one entry
- C<file:///type/database%20name/000>
-
-Each tag in C<data_structure> with specified C<type> will create one
-attribute and corresponding hidden text (used for search).
+ $est->add( 42, $ds );
=cut
sub add {
my $self = shift;
- my $args = {@_};
+ my ( $id, $ds ) = @_;
my $log = $self->_get_logger;
+ $log->logdie("need id") unless defined $id;
+ $log->logdie("need ds") unless $ds;
- my $database = $self->{'database'} || $log->logconfess('no database in $self');
- $log->logconfess('need invindex in object') unless ($self->{'invindex'});
+ $log->debug("id: $id ds = ",dump($ds));
- foreach my $p (qw/id ds type/) {
- $log->logdie("need $p") unless ($args->{$p});
- }
+ my $hash = $self->ds_to_hash( $ds, 'search' ) || return;
- my $type = $args->{'type'};
- my $id = $args->{'id'};
+ warn "add( $id, ",dump($ds)," ) => ", dump( $hash );
- my $uri = "file:///$type/$database/$id";
- $log->debug("creating $uri");
-
- my $doc = $self->{invindex}->new_doc( $uri ) || $log->logdie("can't create new_doc( $uri )");
-
- sub _add_value($$$$$) {
- my ($self,$log,$doc,$n,$v) = @_;
- return unless ($v);
-
- $self->{value_usage}->{$n}++;
- return if ($self->{dummy_run});
-
- eval { $doc->set_value($n, $self->convert($v) ) };
- $log->warn("can't insert: $n = $v") if ($@);
- }
-
- _add_value($self,$log,$doc, 'uri', $uri);
-
- $log->debug("ds = ", sub { dump($args->{'ds'}) } );
-
- # filter all tags which have type defined
- my @tags = grep {
- ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
- } keys %{ $args->{'ds'} };
-
- $log->debug("tags = ", join(",", @tags));
-
- return unless (@tags);
-
- foreach my $tag (@tags) {
-
- my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
-
- next if (! $vals);
-
- $vals = $self->convert( $vals ) or
- $log->logdie("can't convert '$vals' to UTF-8");
-
- _add_value($self, $log, $doc, $tag, $vals );
- }
-
- if (my $text = $args->{'text'}) {
- _add_value($self, $log, $doc, 'bodytext', $text );
- }
-
- #$log->debug("adding ", sub { $doc->dump_draft } );
- $self->{invindex}->add_doc($doc) || $log->warn("can't add document $uri");
+ $self->index->add_doc( $hash );
return 1;
}
my $log = $self->_get_logger();
- $log->info("finish index writing to disk");
- $self->{invindex}->finish;
-
- $log->info("writing value usage file");
-
- # add fields from last run
- map { $self->{value_usage}->{$_}++ } @{ $self->{fields} };
-
- my @fields = keys %{ $self->{value_usage} };
- store \@fields, $self->{fields_path} ||
- $log->warn("can't write $self->{fields_path}: $!");
+ $log->info("dummy finish");
}
=head1 COPYRIGHT & LICENSE
-Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
+Copyright 2005-2007 Dobrica Pavlinusic, All Rights Reserved.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
--- /dev/null
+#!/usr/bin/perl -w
+
+use Test::More tests => 14;
+use Test::Exception;
+use Cwd qw/abs_path/;
+use KinoSearch;
+use File::Slurp;
+use Data::Dump qw/dump/;
+use blib;
+use strict;
+
+BEGIN {
+use_ok( 'WebPAC::Output::KinoSearch' );
+}
+
+my $debug = shift @ARGV;
+
+ok(my $abs_path = abs_path($0), "abs_path");
+$abs_path =~ s#/[^/]*$#/#; #
+diag "abs_path: $abs_path";
+my $path = "$abs_path/kino/";
+
+ok(my $out = new WebPAC::Output::KinoSearch({
+ path => $path,
+ database => 'test',
+}), "new");
+
+ok( $out->init, 'init' );
+
+my $ds = {
+ 'Source' => {
+ 'name' => 'Izvor: ',
+ 'search' => [ 'foo' ]
+ },
+ 'ID' => {
+ 'search' => 'id',
+ },
+ 'Array' => {
+ 'search' => [ qw/a1 a2 s3 a4 a5/ ],
+ },
+};
+
+throws_ok { $out->add( ) } qr/need id/, 'add without params';
+throws_ok { $out->add( 42 ) } qr/need ds/, 'add without ds';
+
+ok( $out->add( 42, $ds ), 'add 42' );
+
+ok( $out->add( 99, { foo => { search => 'bar' } } ), 'add 99' );
+
+ok( $out->add( 100, { foo => { search => [ qw/foo bar baz/ ] } } ), 'add 100' );
+
+ok( -e $out->path, "created $path" );
+
+ok( my $index = $out->index, 'have index' );
+
+diag $out->path," eq ",$path;
+cmp_ok( $out->path, 'eq', $path, 'path' );
+
+my $query_string = 'foo';
+
+my $total_hits = $index->search(
+ query => $query_string,
+ offset => 0,
+ num_wanted => 10,
+);
+
+diag "Total hits: $total_hits\n";
+while ( my $hit = $index->fetch_hit_hashref ) {
+ diag dump($hit);
+}
+
+ok( $out->finish, 'finish' );
+