1 package WebPAC::Output::KinoSearch;
6 use base qw/WebPAC::Common/;
8 use KinoSearch::InvIndexer;
9 use KinoSearch::Analysis::PolyAnalyzer;
10 use Encode qw/from_to/;
15 WebPAC::Output::KinoSearch - Create KinoSearch full text index
23 our $VERSION = '0.01';
27 Create full text index using KinoSearch index from data with
36 my $est = new WebPAC::Output::KinoSearch(
37 index_path => '/path/to/invindex',
38 fields => qw/name of all filelds used/,
40 label => 'node label',
41 encoding => 'iso-8859-2',
51 path to KinoSearch index to use
55 name of all fields used in this index
59 name of database from which data comes
63 label for node (optional)
67 character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
68 (and it probably is). This encoding will be converted to C<UTF-8> for
80 my $log = $self->_get_logger;
82 #$log->debug("self: ", sub { Dumper($self) });
84 foreach my $p (qw/index_path fields database/) {
85 $log->logdie("need $p") unless ($self->{$p});
88 $log->logdie("fields is not ARRAY") unless (ref($self->{fields}) eq 'ARRAY');
90 $self->{encoding} ||= 'ISO-8859-2';
92 $log->info("using index $self->{index_path} with encoding $self->{encoding}");
94 my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' );
96 $self->{invindex} = KinoSearch::InvIndexer->new(
97 invindex => $self->{index_path},
98 create => $self->{clean},
99 analyzer => $analyzer,
102 foreach my $f (@{ $self->{fields} }) {
103 $self->{invindex}->spec_field(
112 $self ? return $self : return undef;
118 Adds one entry to database.
124 text => 'optional text from which snippet is created',
127 This function will create entries in index using following URI format:
129 C<file:///type/database%20name/000>
131 Each tag in C<data_structure> with specified C<type> will create one
132 attribute and corresponding hidden text (used for search).
141 my $log = $self->_get_logger;
143 my $database = $self->{'database'} || $log->logconfess('no database in $self');
144 $log->logconfess('need invindex in object') unless ($self->{'invindex'});
146 foreach my $p (qw/id ds type/) {
147 $log->logdie("need $p") unless ($args->{$p});
150 my $type = $args->{'type'};
151 my $id = $args->{'id'};
153 my $uri = "file:///$type/$database/$id";
154 $log->debug("creating $uri");
156 my $doc = $self->{invindex}->new_doc( $uri ) || $log->logdie("can't create new_doc( $uri )");
160 eval { $doc->set_value($n, $self->convert($v) ) };
161 $log->warn("can't insert: $n = $v") if ($@);
164 add_value('uri', $uri);
166 $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );
168 # filter all tags which have type defined
170 ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
171 } keys %{ $args->{'ds'} };
173 $log->debug("tags = ", join(",", @tags));
175 return unless (@tags);
177 foreach my $tag (@tags) {
179 my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
183 $vals = $self->convert( $vals ) or
184 $log->logdie("can't convert '$vals' to UTF-8");
186 add_value( $tag, $vals );
189 if (my $text = $args->{'text'}) {
190 add_value( 'bodytext', $text );
193 #$log->debug("adding ", sub { $doc->dump_draft } );
194 $self->{invindex}->add_doc($doc) || $log->warn("can't add document $uri");
202 my $utf8_string = $self->convert('string in codepage');
209 my $text = shift || return;
210 from_to($text, $self->{encoding}, 'UTF-8');
216 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
218 =head1 COPYRIGHT & LICENSE
220 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
222 This program is free software; you can redistribute it and/or modify it
223 under the same terms as Perl itself.
227 1; # End of WebPAC::Output::Estraier