use warnings;
use strict;
-use base qw/WebPAC::Common/;
-
-use KinoSearch::InvIndexer;
-use KinoSearch::Analysis::PolyAnalyzer;
+use base qw/WebPAC::Common WebPAC::Output Class::Accessor/;
+__PACKAGE__->mk_accessors(qw(
+ path
+ database
+ encoding
+ clean
+
+ index
+));
+
+use KinoSearch::Simple;
+use File::Path;
use Encode qw/from_to/;
-use Data::Dumper;
+use Data::Dump qw/dump/;
use Storable;
=head1 NAME
=head1 VERSION
-Version 0.03
+Version 0.05
=cut
-our $VERSION = '0.03';
+our $VERSION = '0.05';
=head1 SYNOPSIS
Open KinoSearch index
- my $est = new WebPAC::Output::KinoSearch(
- index_path => '/path/to/invindex',
- fields => qw/name of all filelds used/,
+ my $out = new WebPAC::Output::KinoSearch({
+ path => '/path/to/invindex',
database => 'demo',
- label => 'node label',
encoding => 'iso-8859-2',
clean => 1,
- );
+ });
Options are:
=over 4
-=item index_path
+=item path
path to KinoSearch index to use
-=item fields
-
-name of all fields used in this index
-
=item database
name of database from which data comes
-=item label
-
-label for node (optional)
-
=item encoding
character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
=back
+=head2 init
+
+ $out->init;
+
=cut
-sub new {
- my $class = shift;
- my $self = {@_};
- bless($self, $class);
+sub init {
+ my $self = shift;
my $log = $self->_get_logger;
- #$log->debug("self: ", sub { Dumper($self) });
+ #$log->debug("self: ", sub { dump($self) });
- foreach my $p (qw/index_path fields database/) {
- $log->logdie("need $p") unless ($self->{$p});
+ foreach my $p (qw/path database/) {
+ $log->logdie("need $p") unless ($self->$p);
}
- $log->logdie("fields is not ARRAY") unless (ref($self->{fields}) eq 'ARRAY');
-
- $self->{encoding} ||= 'ISO-8859-2';
+# $log->logdie("fields is not ARRAY") unless (ref($self->{fields}) eq 'ARRAY');
- $self->{index_path} .= '/' . $self->{database};
+ $self->encoding( 'ISO-8859-2' ) unless $self->encoding;
- $self->{clean} = 1 if (! -e $self->{index_path} . '/segments');
+ if ( ! -e $self->path ) {
+ mkpath $self->path || $log->logdie("can't create ", $self->path,": $!");
+ $log->info("created ", $self->path);
+ } elsif ( $self->clean ) {
+ $log->info("removing existing ", $self->path);
+ rmtree $self->path || $log->logdie("can't remove ", $self->path,": $!");
+ mkpath $self->path || $log->logdie("can't create ", $self->path,": $!");
+ }
- $log->info("using", $self->{clean} ? ' new' : '', " index $self->{index_path} with encoding $self->{encoding}");
+ my $path = $self->path . '/' . $self->database;
- my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' );
+ $log->info("using index $path with encoding ", $self->encoding);
- $self->{invindex} = KinoSearch::InvIndexer->new(
- invindex => $self->{index_path},
- create => $self->{clean},
- analyzer => $analyzer,
+ my $index = KinoSearch::Simple->new(
+ path => $path,
+ language => 'en',
);
- my $fields_path = $self->{index_path} . '/fields.storable';
- $fields_path =~ s#//#/#g;
- if (-e $fields_path) {
- $self->{fields} = retrieve($fields_path) ||
- $log->warn("can't open $fields_path: $!");
- } else {
- $log->error("This will be dummy run since no fields statistics are found!");
- $log->error("You will have to re-run indexing to get search results!");
- $self->{dummy_run} = 1;
- }
- $self->{fields_path} = $fields_path;
-
- foreach my $f (@{ $self->{fields} }) {
- $self->{invindex}->spec_field(
- name => $f,
-# boost => 10,
- stored => 1,
- indexed => 1,
- vectorized => 0,
- );
- }
+ $log->logdie("can't open $path: $!") unless $index;
+
+ $self->index( $index );
- $self ? return $self : return undef;
}
=head2 add
-Adds one entry to database.
-
- $est->add(
- id => 42,
- ds => $ds,
- type => 'display',
- text => 'optional text from which snippet is created',
- );
-
-This function will create entries in index using following URI format:
+Adds one entry
- C<file:///type/database%20name/000>
-
-Each tag in C<data_structure> with specified C<type> will create one
-attribute and corresponding hidden text (used for search).
+ $out->add( 42, $ds );
=cut
sub add {
my $self = shift;
- my $args = {@_};
+ my ( $id, $ds ) = @_;
my $log = $self->_get_logger;
+ $log->logdie("need id") unless defined $id;
+ $log->logdie("need ds") unless $ds;
- my $database = $self->{'database'} || $log->logconfess('no database in $self');
- $log->logconfess('need invindex in object') unless ($self->{'invindex'});
-
- foreach my $p (qw/id ds type/) {
- $log->logdie("need $p") unless ($args->{$p});
- }
-
- my $type = $args->{'type'};
- my $id = $args->{'id'};
-
- my $uri = "file:///$type/$database/$id";
- $log->debug("creating $uri");
-
- my $doc = $self->{invindex}->new_doc( $uri ) || $log->logdie("can't create new_doc( $uri )");
-
- sub _add_value($$$$$) {
- my ($self,$log,$doc,$n,$v) = @_;
- return unless ($v);
-
- $self->{value_usage}->{$n}++;
- return if ($self->{dummy_run});
+ $log->debug("id: $id ds = ",dump($ds));
- eval { $doc->set_value($n, $self->convert($v) ) };
- $log->warn("can't insert: $n = $v") if ($@);
- }
-
- _add_value($self,$log,$doc, 'uri', $uri);
-
- $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );
-
- # filter all tags which have type defined
- my @tags = grep {
- ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
- } keys %{ $args->{'ds'} };
-
- $log->debug("tags = ", join(",", @tags));
-
- return unless (@tags);
+ my $hash = $self->ds_to_hash( $ds, 'search' ) || return;
- foreach my $tag (@tags) {
+ $hash->{database} ||= $self->database;
+ $hash->{id} ||= $id;
- my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
-
- next if (! $vals);
-
- $vals = $self->convert( $vals ) or
- $log->logdie("can't convert '$vals' to UTF-8");
-
- _add_value($self, $log, $doc, $tag, $vals );
- }
-
- if (my $text = $args->{'text'}) {
- _add_value($self, $log, $doc, 'bodytext', $text );
- }
+ $log->debug("add( $id, ", sub { dump($ds) }," ) => ", sub { dump( $hash ) });
- #$log->debug("adding ", sub { $doc->dump_draft } );
- $self->{invindex}->add_doc($doc) || $log->warn("can't add document $uri");
+ $self->index->add_doc( $hash );
return 1;
}
Close index
- $index->finish;
+ $out->finish;
=cut
my $log = $self->_get_logger();
- $log->info("finish index writing to disk");
- $self->{invindex}->finish;
-
- $log->info("writing value usage file");
-
- # add fields from last run
- map { $self->{value_usage}->{$_}++ } @{ $self->{fields} };
-
- my @fields = keys %{ $self->{value_usage} };
- store \@fields, $self->{fields_path} ||
- $log->warn("can't write $self->{fields_path}: $!");
-
-}
-
-=head2 convert
-
- my $utf8_string = $self->convert('string in codepage');
-
-=cut
-
-sub convert {
- my $self = shift;
+ $log->info("dummy finish");
- my $text = shift || return;
- from_to($text, $self->{encoding}, 'UTF-8');
- return $text;
}
=head1 AUTHOR
=head1 COPYRIGHT & LICENSE
-Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
+Copyright 2005-2007 Dobrica Pavlinusic, All Rights Reserved.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.