use warnings;
use strict;
-use base qw/WebPAC::Common/;
-
-use KinoSearch::InvIndexer;
-use KinoSearch::Analysis::PolyAnalyzer;
+use base qw/WebPAC::Common WebPAC::Output Class::Accessor/;
+__PACKAGE__->mk_accessors(qw(
+ path
+ database
+ encoding
+ clean
+
+ index
+));
+
+use KinoSearch::Simple;
+use File::Path;
use Encode qw/from_to/;
-use Data::Dumper;
+use Data::Dump qw/dump/;
+use Storable;
=head1 NAME
=head1 VERSION
-Version 0.01
+Version 0.05
=cut
-our $VERSION = '0.01';
+our $VERSION = '0.05';
=head1 SYNOPSIS
Open KinoSearch index
- my $est = new WebPAC::Output::KinoSearch(
- index_path => '/path/to/invindex',
- fields => qw/name of all filelds used/,
+ my $out = new WebPAC::Output::KinoSearch({
+ path => '/path/to/invindex',
database => 'demo',
- label => 'node label',
encoding => 'iso-8859-2',
clean => 1,
- );
+ });
Options are:
=over 4
-=item index_path
+=item path
path to KinoSearch index to use
-=item fields
-
-name of all fields used in this index
-
=item database
name of database from which data comes
-=item label
-
-label for node (optional)
-
=item encoding
character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
=back
+=head2 init
+
+ $out->init;
+
=cut
-sub new {
- my $class = shift;
- my $self = {@_};
- bless($self, $class);
+sub init {
+ my $self = shift;
my $log = $self->_get_logger;
- #$log->debug("self: ", sub { Dumper($self) });
+ #$log->debug("self: ", sub { dump($self) });
- foreach my $p (qw/index_path fields database/) {
- $log->logdie("need $p") unless ($self->{$p});
+ foreach my $p (qw/path database/) {
+ $log->logdie("need $p") unless ($self->$p);
}
- $log->logdie("fields is not ARRAY") unless (ref($self->{fields}) eq 'ARRAY');
+# $log->logdie("fields is not ARRAY") unless (ref($self->{fields}) eq 'ARRAY');
- $self->{encoding} ||= 'ISO-8859-2';
+ $self->encoding( 'ISO-8859-2' ) unless $self->encoding;
+
+ if ( ! -e $self->path ) {
+ mkpath $self->path || $log->logdie("can't create ", $self->path,": $!");
+ $log->info("created ", $self->path);
+ } elsif ( $self->clean ) {
+ $log->info("removing existing ", $self->path);
+ rmtree $self->path || $log->logdie("can't remove ", $self->path,": $!");
+ mkpath $self->path || $log->logdie("can't create ", $self->path,": $!");
+ }
- $log->info("using index $self->{index_path} with encoding $self->{encoding}");
+ my $path = $self->path . '/' . $self->database;
- my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' );
+ $log->info("using index $path with encoding ", $self->encoding);
- $self->{invindex} = KinoSearch::InvIndexer->new(
- invindex => $self->{index_path},
- create => $self->{clean},
- analyzer => $analyzer,
+ my $index = KinoSearch::Simple->new(
+ path => $path,
+ language => 'en',
);
- foreach my $f (@{ $self->{fields} }) {
- $self->{invindex}->spec_field(
- name => $f,
-# boost => 10,
- stored => 1,
- indexed => 1,
- vectorized => 0,
- );
- }
+ $log->logdie("can't open $path: $!") unless $index;
+
+ $self->index( $index );
- $self ? return $self : return undef;
}
=head2 add
-Adds one entry to database.
-
- $est->add(
- id => 42,
- ds => $ds,
- type => 'display',
- text => 'optional text from which snippet is created',
- );
-
-This function will create entries in index using following URI format:
-
- C<file:///type/database%20name/000>
+Adds one entry
-Each tag in C<data_structure> with specified C<type> will create one
-attribute and corresponding hidden text (used for search).
+ $out->add( 42, $ds );
=cut
sub add {
my $self = shift;
- my $args = {@_};
+ my ( $id, $ds ) = @_;
my $log = $self->_get_logger;
+ $log->logdie("need id") unless defined $id;
+ $log->logdie("need ds") unless $ds;
- my $database = $self->{'database'} || $log->logconfess('no database in $self');
- $log->logconfess('need invindex in object') unless ($self->{'invindex'});
-
- foreach my $p (qw/id ds type/) {
- $log->logdie("need $p") unless ($args->{$p});
- }
-
- my $type = $args->{'type'};
- my $id = $args->{'id'};
-
- my $uri = "file:///$type/$database/$id";
- $log->debug("creating $uri");
+ $log->debug("id: $id ds = ",dump($ds));
- my $doc = $self->{invindex}->new_doc( $uri ) || $log->logdie("can't create new_doc( $uri )");
+ my $hash = $self->ds_to_hash( $ds, 'search' ) || return;
- sub add_value($$$) {
- my ($doc,$n,$v) = @_;
- eval { $doc->set_value($n, $self->convert($v) ) };
- $log->warn("can't insert: $n = $v") if ($@);
- }
-
- add_value($doc, 'uri', $uri);
-
- $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );
-
- # filter all tags which have type defined
- my @tags = grep {
- ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
- } keys %{ $args->{'ds'} };
-
- $log->debug("tags = ", join(",", @tags));
+ $hash->{database} ||= $self->database;
+ $hash->{id} ||= $id;
- return unless (@tags);
+ $log->debug("add( $id, ", sub { dump($ds) }," ) => ", sub { dump( $hash ) });
- foreach my $tag (@tags) {
-
- my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
-
- next if (! $vals);
-
- $vals = $self->convert( $vals ) or
- $log->logdie("can't convert '$vals' to UTF-8");
-
- add_value($doc, $tag, $vals );
- }
-
- if (my $text = $args->{'text'}) {
- add_value($doc, 'bodytext', $text );
- }
-
- #$log->debug("adding ", sub { $doc->dump_draft } );
- $self->{invindex}->add_doc($doc) || $log->warn("can't add document $uri");
+ $self->index->add_doc( $hash );
return 1;
}
Close index
- $index->finish;
+ $out->finish;
=cut
sub finish {
my $self = shift;
- $self->_get_logger()->info("finish index writing to disk");
- $self->{invindex}->finish;
-}
-
-=head2 convert
+ my $log = $self->_get_logger();
- my $utf8_string = $self->convert('string in codepage');
-
-=cut
-
-sub convert {
- my $self = shift;
+ $log->info("dummy finish");
- my $text = shift || return;
- from_to($text, $self->{encoding}, 'UTF-8');
- return $text;
}
=head1 AUTHOR
=head1 COPYRIGHT & LICENSE
-Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
+Copyright 2005-2007 Dobrica Pavlinusic, All Rights Reserved.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.