--- /dev/null
+package WebPAC::Output::KinoSearch;
+
+use warnings;
+use strict;
+
+use base qw/WebPAC::Common/;
+
+use KinoSearch::InvIndexer;
+use KinoSearch::Analysis::PolyAnalyzer;
+use Encode qw/from_to/;
+use Data::Dumper;
+
+=head1 NAME
+
+WebPAC::Output::KinoSearch - Create KinoSearch full text index
+
+=head1 VERSION
+
+Version 0.01
+
+=cut
+
+our $VERSION = '0.01';
+
+=head1 SYNOPSIS
+
+Create full text index using KinoSearch index from data with
+type C<search>.
+
+=head1 FUNCTIONS
+
+=head2 new
+
+Open KinoSearch index
+
+ my $est = new WebPAC::Output::KinoSearch(
+ index_path => '/path/to/invindex',
+ fields => qw/name of all filelds used/,
+ database => 'demo',
+ label => 'node label',
+ encoding => 'iso-8859-2',
+ clean => 1,
+ );
+
+Options are:
+
+=over 4
+
+=item index_path
+
+path to KinoSearch index to use
+
+=item fields
+
+name of all fields used in this index
+
+=item database
+
+name of database from which data comes
+
+=item label
+
+label for node (optional)
+
+=item encoding
+
+character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
+(and it probably is). This encoding will be converted to C<UTF-8> for
+index.
+
+=back
+
+=cut
+
+sub new {
+ my $class = shift;
+ my $self = {@_};
+ bless($self, $class);
+
+ my $log = $self->_get_logger;
+
+ #$log->debug("self: ", sub { Dumper($self) });
+
+ foreach my $p (qw/index_path fields database/) {
+ $log->logdie("need $p") unless ($self->{$p});
+ }
+
+ $log->logdie("fields is not ARRAY") unless (ref($self->{fields}) eq 'ARRAY');
+
+ $self->{encoding} ||= 'ISO-8859-2';
+
+ $log->info("using index $self->{index_path} with encoding $self->{encoding}");
+
+ my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' );
+
+ $self->{invindex} = KinoSearch::InvIndexer->new(
+ invindex => $self->{index_path},
+ create => $self->{clean},
+ analyzer => $analyzer,
+ );
+
+ foreach my $f (@{ $self->{fields} }) {
+ $self->{invindex}->spec_field(
+ name => $f,
+# boost => 10,
+ stored => 1,
+ indexed => 1,
+ vectorized => 0,
+ );
+ }
+
+ $self ? return $self : return undef;
+}
+
+
+=head2 add
+
+Adds one entry to database.
+
+ $est->add(
+ id => 42,
+ ds => $ds,
+ type => 'display',
+ text => 'optional text from which snippet is created',
+ );
+
+This function will create entries in index using following URI format:
+
+ C<file:///type/database%20name/000>
+
+Each tag in C<data_structure> with specified C<type> will create one
+attribute and corresponding hidden text (used for search).
+
+=cut
+
+sub add {
+ my $self = shift;
+
+ my $args = {@_};
+
+ my $log = $self->_get_logger;
+
+ my $database = $self->{'database'} || $log->logconfess('no database in $self');
+ $log->logconfess('need invindex in object') unless ($self->{'invindex'});
+
+ foreach my $p (qw/id ds type/) {
+ $log->logdie("need $p") unless ($args->{$p});
+ }
+
+ my $type = $args->{'type'};
+ my $id = $args->{'id'};
+
+ my $uri = "file:///$type/$database/$id";
+ $log->debug("creating $uri");
+
+ my $doc = $self->{invindex}->new_doc( $uri ) || $log->logdie("can't create new_doc( $uri )");
+ eval { $doc->set_value('uri', $self->convert($uri) ) };
+
+ $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );
+
+ # filter all tags which have type defined
+ my @tags = grep {
+ ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
+ } keys %{ $args->{'ds'} };
+
+ $log->debug("tags = ", join(",", @tags));
+
+ return unless (@tags);
+
+ foreach my $tag (@tags) {
+
+ my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
+
+ next if (! $vals);
+
+ $vals = $self->convert( $vals ) or
+ $log->logdie("can't convert '$vals' to UTF-8");
+
+ eval { $doc->set_value( $tag, $vals ) };
+ }
+
+ my $text = $args->{'text'};
+ if ( $text ) {
+ $text = $self->convert( $text ) or
+ $log->logdie("can't convert '$text' to UTF-8");
+ eval { $doc->set_value( bodytext => $text ) };
+ }
+
+ #$log->debug("adding ", sub { $doc->dump_draft } );
+ $self->{invindex}->add_doc($doc) || $log->warn("can't add document $uri");
+
+ return 1;
+}
+
+
+=head2 convert
+
+ my $utf8_string = $self->convert('string in codepage');
+
+=cut
+
+sub convert {
+ my $self = shift;
+
+ my $text = shift || return;
+ from_to($text, $self->{encoding}, 'UTF-8');
+ return $text;
+}
+
+=head1 AUTHOR
+
+Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut
+
+1; # End of WebPAC::Output::Estraier
print "config = ",Dumper($config) if ($debug);
die "no databases in config file!\n" unless ($config->{databases});
+
+my $log = _new WebPAC::Common()->_get_logger();
+
my $use_indexer = $config->{use_indexer} || 'hyperestraier';
+$log->info("using $use_indexer indexing engine...");
my $total_rows = 0;
my $start_t = time();
next if ($only_db_name && $database !~ m/$only_db_name/i);
- my $log = _new WebPAC::Common()->_get_logger();
-
my $indexer;
+ my $indexer_config = $config->{$use_indexer} || $log->logdie("can't find '$use_indexer' part in confguration");
+ $indexer_config->{database} = $database;
+ $indexer_config->{clean} = $clean;
+ $indexer_config->{label} = $db_config->{name};
+
if ($use_indexer eq 'hyperestraier') {
- #
# open Hyper Estraier database
- #
-
use WebPAC::Output::Estraier '0.10';
- my $est_config = $config->{hyperestraier} || $log->logdie("can't find 'hyperestraier' part in confguration");
- $est_config->{database} = $database;
- $est_config->{clean} = $clean;
- $est_config->{label} = $db_config->{name};
+ $indexer = new WebPAC::Output::Estraier( %{ $indexer_config } );
+
+ } elsif ($use_indexer eq 'kinosearch') {
+
+ # open KinoSearch
+ use WebPAC::Output::KinoSearch;
+ $indexer = new WebPAC::Output::KinoSearch( %{ $indexer_config } );
- $indexer = new WebPAC::Output::Estraier( %{ $est_config } );
} else {
$log->logdie("unknown use_indexer: $use_indexer");
}
)
);
- if ($use_indexer eq 'hyperestraier') {
- #
- # add Hyper Estraier links to other databases
- #
- if (ref($db_config->{links}) eq 'ARRAY') {
- foreach my $link (@{ $db_config->{links} }) {
+ #
+ # add Hyper Estraier links to other databases
+ #
+ if (ref($db_config->{links}) eq 'ARRAY') {
+ foreach my $link (@{ $db_config->{links} }) {
+ if ($use_indexer eq 'hyperestraier') {
$log->info("adding link $database -> $link->{to} [$link->{credit}]");
$indexer->add_link(
from => $database,
to => $link->{to},
credit => $link->{credit},
);
+ } else {
+ $log->warn("NOT IMPLEMENTED WITH $use_indexer: adding link $database -> $link->{to} [$link->{credit}]");
}
}
- } else {
- $log->warn("links not implemented for $use_indexer");
}
}